Java Code Examples for htsjdk.variant.variantcontext.writer.VariantContextWriter#writeHeader()

The following examples show how to use htsjdk.variant.variantcontext.writer.VariantContextWriter#writeHeader() . You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: AmberVCF.java    From hmftools with GNU General Public License v3.0 6 votes vote down vote up
public void writeBAF(@NotNull final String filename, @NotNull final Collection<TumorBAF> tumorEvidence,  @NotNull final AmberHetNormalEvidence hetNormalEvidence) {
    final List<TumorBAF> list = Lists.newArrayList(tumorEvidence);
    Collections.sort(list);

    final VariantContextWriter writer =
            new VariantContextWriterBuilder().setOutputFile(filename).modifyOption(Options.INDEX_ON_THE_FLY, true).build();
    final VCFHeader header = header(config.tumorOnly() ? Collections.singletonList(config.tumor()) : config.allSamples());
    writer.setHeader(header);
    writer.writeHeader(header);

    final ListMultimap<AmberSite, Genotype> genotypeMap = ArrayListMultimap.create();
    for (final String sample : hetNormalEvidence.samples()) {
        for (BaseDepth baseDepth : hetNormalEvidence.evidence(sample)) {
            genotypeMap.put(AmberSiteFactory.asSite(baseDepth), createGenotype(sample, baseDepth));
        }
    }

    for (final TumorBAF tumorBAF : list) {
        AmberSite tumorSite = AmberSiteFactory.tumorSite(tumorBAF);
        genotypeMap.put(tumorSite, createGenotype(tumorBAF));
        writer.add(create(tumorBAF, genotypeMap.get(tumorSite)));
    }

    writer.close();
}
 
Example 2
Source File: GenotypeConcordance.java    From picard with MIT License 6 votes vote down vote up
/** Gets the variant context writer if the output VCF is to be written, otherwise empty. */
private Optional<VariantContextWriter> getVariantContextWriter(final VCFFileReader truthReader, final VCFFileReader callReader) {
    if (OUTPUT_VCF) {
        final File outputVcfFile = new File(OUTPUT + OUTPUT_VCF_FILE_EXTENSION);
        final VariantContextWriterBuilder builder = new VariantContextWriterBuilder()
                .setOutputFile(outputVcfFile)
                .setReferenceDictionary(callReader.getFileHeader().getSequenceDictionary())
                .setOption(Options.ALLOW_MISSING_FIELDS_IN_HEADER)
                .setOption(Options.INDEX_ON_THE_FLY);
        final VariantContextWriter writer = builder.build();

        // create the output header
        final List<String> sampleNames = Arrays.asList(OUTPUT_VCF_CALL_SAMPLE_NAME, OUTPUT_VCF_TRUTH_SAMPLE_NAME);
        final Set<VCFHeaderLine> headerLines = new HashSet<>();
        headerLines.addAll(callReader.getFileHeader().getMetaDataInInputOrder());
        headerLines.addAll(truthReader.getFileHeader().getMetaDataInInputOrder());
        headerLines.add(CONTINGENCY_STATE_HEADER_LINE);
        writer.writeHeader(new VCFHeader(headerLines, sampleNames));
        return Optional.of(writer);
    }
    else {
        return Optional.empty();
    }
}
 
Example 3
Source File: FilterVariantTranches.java    From gatk with BSD 3-Clause "New" or "Revised" License 6 votes vote down vote up
private void writeVCFHeader(VariantContextWriter vcfWriter) {
    // setup the header fields
    final VCFHeader inputHeader = getHeaderForVariants();
    Set<VCFHeaderLine> hInfo = new LinkedHashSet<VCFHeaderLine>();
    hInfo.addAll(inputHeader.getMetaDataInSortedOrder());

    boolean hasInfoKey = hInfo.stream().anyMatch(
            x -> x instanceof VCFInfoHeaderLine && ((VCFInfoHeaderLine) x).getID().equals(infoKey));
    if (!hasInfoKey){
        throw new UserException(String.format("Input VCF does not contain a header line for specified info key:%s", infoKey));
    }

    if (removeOldFilters){
        hInfo.removeIf(x -> x instanceof VCFFilterHeaderLine);
    }

    addTrancheHeaderFields(SNPString, snpTranches, hInfo);
    addTrancheHeaderFields(INDELString, indelTranches, hInfo);

    final TreeSet<String> samples = new TreeSet<>();
    samples.addAll(inputHeader.getGenotypeSamples());
    hInfo.addAll(getDefaultToolVCFHeaderLines());
    final VCFHeader vcfHeader = new VCFHeader(hInfo, samples);
    vcfWriter.writeHeader(vcfHeader);
}
 
Example 4
Source File: MNVValidatorApplication.java    From hmftools with GNU General Public License v3.0 6 votes vote down vote up
private static void processVariants(boolean strelka, @NotNull final String filePath, @NotNull final String outputVcf,
        @NotNull final String tumorBam) {
    final VCFFileReader vcfReader = new VCFFileReader(new File(filePath), false);
    final VCFHeader outputHeader = generateOutputHeader(vcfReader.getFileHeader(), "TUMOR");
    final VariantContextWriter vcfWriter = new VariantContextWriterBuilder().setOutputFile(outputVcf)
            .setReferenceDictionary(vcfReader.getFileHeader().getSequenceDictionary())
            .build();
    vcfWriter.writeHeader(outputHeader);
    final MNVValidator validator = ImmutableMNVValidator.of(tumorBam);
    final MNVMerger merger = ImmutableMNVMerger.of(outputHeader);
    Pair<PotentialMNVRegion, Optional<PotentialMNVRegion>> outputPair = ImmutablePair.of(PotentialMNVRegion.empty(), Optional.empty());
    for (final VariantContext rawVariant : vcfReader) {
        final VariantContext simplifiedVariant =
                strelka ? StrelkaPostProcess.simplifyVariant(rawVariant, StrelkaPostProcess.TUMOR_GENOTYPE) : rawVariant;

        final PotentialMNVRegion potentialMNV = outputPair.getLeft();
        outputPair = MNVDetector.addMnvToRegion(potentialMNV, simplifiedVariant);
        outputPair.getRight().ifPresent(mnvRegion -> validator.mergeVariants(mnvRegion, merger).forEach(vcfWriter::add));
    }
    validator.mergeVariants(outputPair.getLeft(), merger).forEach(vcfWriter::add);
    vcfWriter.close();
    vcfReader.close();
    LOGGER.info("Written output variants to " + outputVcf);
}
 
Example 5
Source File: HMMPostProcessor.java    From gatk-protected with BSD 3-Clause "New" or "Revised" License 6 votes vote down vote up
/**
 * For each segment in genotypingSegments, compose the variant context and write to outputWriter
 *
 * @param genotypingSegments a list of genotyping segments
 * @param outputWriter a VCF writer
 * @param variantPrefix a prefix for composing variant IDs
 * @param commandLine (optional) command line used to generated the data
 */
private void composeVariantContextAndWrite(@Nonnull final List<GenotypingSegment> genotypingSegments,
                                           @Nonnull final VariantContextWriter outputWriter,
                                           @Nonnull final String variantPrefix,
                                           @Nullable final String commandLine) {
    outputWriter.writeHeader(composeHeader(commandLine));
    int counter = 0;
    int prevReportedDonePercentage = -1;
    for (final GenotypingSegment segment : genotypingSegments) {
        final int donePercentage = (int)(100 * counter / (double)genotypingSegments.size());
        if (donePercentage % 10 == 0 && prevReportedDonePercentage != donePercentage) {
            logger.info(String.format("%d%% done...", donePercentage));
            prevReportedDonePercentage = donePercentage;
        }
        final VariantContext variant = composeVariantContext(segment, variantPrefix);
        counter++;
        outputWriter.add(variant);
    }
    logger.info("100% done.");
}
 
Example 6
Source File: FingerprintUtils.java    From picard with MIT License 6 votes vote down vote up
private static VariantContextWriter getVariantContextWriter(final File outputFile,
                                                            final File referenceSequenceFileName,
                                                            final String sample,
                                                            final String source,
                                                            final ReferenceSequenceFile ref) {
    final VariantContextWriter variantContextWriter = new VariantContextWriterBuilder()
            .setReferenceDictionary(ref.getSequenceDictionary())
            .setOutputFile(outputFile).build();

    final Set<VCFHeaderLine> lines = new LinkedHashSet<>();
    lines.add(new VCFHeaderLine("reference", referenceSequenceFileName.getAbsolutePath()));
    lines.add(new VCFHeaderLine("source", source));
    lines.add(new VCFHeaderLine("fileDate", new Date().toString()));

    lines.add(VCFStandardHeaderLines.getFormatLine(VCFConstants.GENOTYPE_PL_KEY));
    lines.add(VCFStandardHeaderLines.getFormatLine(VCFConstants.GENOTYPE_ALLELE_DEPTHS));
    lines.add(VCFStandardHeaderLines.getFormatLine(VCFConstants.DEPTH_KEY));

    final VCFHeader header = new VCFHeader(lines, Collections.singletonList(sample));
    header.setSequenceDictionary(ref.getSequenceDictionary());
    variantContextWriter.writeHeader(header);
    return variantContextWriter;
}
 
Example 7
Source File: SortVcf.java    From picard with MIT License 5 votes vote down vote up
private void writeSortedOutput(final VCFHeader outputHeader, final SortingCollection<VariantContext> sortedOutput) {
    final ProgressLogger writeProgress = new ProgressLogger(log, 25000, "wrote", "records");
    final EnumSet<Options> options = CREATE_INDEX ? EnumSet.of(Options.INDEX_ON_THE_FLY) : EnumSet.noneOf(Options.class);
    final VariantContextWriter out = new VariantContextWriterBuilder().
            setReferenceDictionary(outputHeader.getSequenceDictionary()).
            setOptions(options).
            setOutputFile(OUTPUT).build();
    out.writeHeader(outputHeader);
    for (final VariantContext variantContext : sortedOutput) {
        out.add(variantContext);
        writeProgress.record(variantContext.getContig(), variantContext.getStart());
    }
    out.close();
}
 
Example 8
Source File: RenameSampleInVcf.java    From picard with MIT License 5 votes vote down vote up
@Override
protected int doWork() {
    IOUtil.assertFileIsReadable(INPUT);
    IOUtil.assertFileIsWritable(OUTPUT);

    final VCFFileReader in = new VCFFileReader(INPUT, false);
    final VCFHeader header = in.getFileHeader();

    if (header.getGenotypeSamples().size() > 1) {
        throw new IllegalArgumentException("Input VCF must be single-sample.");
    }

    if (OLD_SAMPLE_NAME != null && !OLD_SAMPLE_NAME.equals(header.getGenotypeSamples().get(0))) {
        throw new IllegalArgumentException("Input VCF did not contain expected sample. Contained: " + header.getGenotypeSamples().get(0));
    }

    final EnumSet<Options> options = EnumSet.copyOf(VariantContextWriterBuilder.DEFAULT_OPTIONS);
    if (CREATE_INDEX) options.add(Options.INDEX_ON_THE_FLY); else options.remove(Options.INDEX_ON_THE_FLY);

    final VCFHeader outHeader = new VCFHeader(header.getMetaDataInInputOrder(), CollectionUtil.makeList(NEW_SAMPLE_NAME));
    final VariantContextWriter out = new VariantContextWriterBuilder()
            .setOptions(options)
            .setOutputFile(OUTPUT).setReferenceDictionary(outHeader.getSequenceDictionary()).build();
    out.writeHeader(outHeader);

    for (final VariantContext ctx : in) {
        out.add(ctx);
    }

    out.close();
    in.close();

    return 0;
}
 
Example 9
Source File: GATKVariantContextUtilsUnitTest.java    From gatk with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
private void writeHeader(final VariantContextWriter writer) {
    final Set<VCFHeaderLine> metaData = new HashSet<>();
    metaData.add(new VCFHeaderLine(
            VCFHeaderVersion.VCF4_2.getFormatString(),
            VCFHeaderVersion.VCF4_2.getVersionString()));
    final VCFHeader vcfHeader = new VCFHeader(metaData, Collections.emptyList());
    vcfHeader.setSequenceDictionary(makeSimpleSequenceDictionary());
    writer.writeHeader(vcfHeader);
}
 
Example 10
Source File: GenotypeGVCFsEngine.java    From gatk with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
/**
 * Create a VCF header in the writer
 *
 * @param vcfWriter
 * @return a VCF writer

 */
public VariantContextWriter setupVCFWriter(Set<VCFHeaderLine> defaultToolVCFHeaderLines, boolean keepCombined, DbsnpArgumentCollection dbsnp, VariantContextWriter vcfWriter) {
    final Set<VCFHeaderLine> headerLines = new LinkedHashSet<>(inputVCFHeader.getMetaDataInInputOrder());
    headerLines.addAll(defaultToolVCFHeaderLines);

    // Remove GCVFBlocks
    headerLines.removeIf(vcfHeaderLine -> vcfHeaderLine.getKey().startsWith(GVCF_BLOCK));

    headerLines.addAll(annotationEngine.getVCFAnnotationDescriptions(false));
    headerLines.addAll(genotypingEngine.getAppropriateVCFInfoHeaders());

    // add headers for annotations added by this tool
    headerLines.add(GATKVCFHeaderLines.getInfoLine(GATKVCFConstants.MLE_ALLELE_COUNT_KEY));
    headerLines.add(GATKVCFHeaderLines.getInfoLine(GATKVCFConstants.MLE_ALLELE_FREQUENCY_KEY));
    headerLines.add(GATKVCFHeaderLines.getFormatLine(GATKVCFConstants.REFERENCE_GENOTYPE_QUALITY));
    headerLines.add(VCFStandardHeaderLines.getInfoLine(VCFConstants.DEPTH_KEY));   // needed for gVCFs without DP tags
    if (keepCombined) {
        headerLines.add(GATKVCFHeaderLines.getInfoLine(GATKVCFConstants.AS_QUAL_KEY));
        headerLines.add(GATKVCFHeaderLines.getInfoLine(GATKVCFConstants.AS_RAW_QUAL_APPROX_KEY));
    }
    if ( dbsnp.dbsnp != null  ) {
        VCFStandardHeaderLines.addStandardInfoLines(headerLines, true, VCFConstants.DBSNP_KEY);
    }

    final Set<String> sampleNameSet = samples.asSetOfSamples();
    outputHeader = new VCFHeader(headerLines, new TreeSet<>(sampleNameSet));
    vcfWriter.writeHeader(outputHeader);

    return vcfWriter;
}
 
Example 11
Source File: UpdateVcfSequenceDictionary.java    From picard with MIT License 5 votes vote down vote up
@Override
protected int doWork() {
    IOUtil.assertFileIsReadable(INPUT);
    IOUtil.assertFileIsReadable(SEQUENCE_DICTIONARY);
    IOUtil.assertFileIsWritable(OUTPUT);

    final SAMSequenceDictionary samSequenceDictionary = SAMSequenceDictionaryExtractor.extractDictionary(SEQUENCE_DICTIONARY.toPath());

    final VCFFileReader fileReader = new VCFFileReader(INPUT, false);
    final VCFHeader fileHeader = fileReader.getFileHeader();

    final VariantContextWriterBuilder builder = new VariantContextWriterBuilder()
            .setReferenceDictionary(samSequenceDictionary)
            .clearOptions();
    if (CREATE_INDEX)
        builder.setOption(Options.INDEX_ON_THE_FLY);

    final VariantContextWriter vcfWriter = builder.setOutputFile(OUTPUT).build();
    fileHeader.setSequenceDictionary(samSequenceDictionary);
    vcfWriter.writeHeader(fileHeader);

    final ProgressLogger progress = new ProgressLogger(log, 10000);
    final CloseableIterator<VariantContext> iterator = fileReader.iterator();
    while (iterator.hasNext()) {
        final VariantContext context = iterator.next();
        vcfWriter.add(context);
        progress.record(context.getContig(), context.getStart());
    }

    CloserUtil.close(iterator);
    CloserUtil.close(fileReader);
    vcfWriter.close();

    return 0;
}
 
Example 12
Source File: StrelkaPostProcessApplication.java    From hmftools with GNU General Public License v3.0 5 votes vote down vote up
private static void processVariants(@NotNull final String filePath, @NotNull final Slicer highConfidenceSlicer,
        @NotNull final String outputVcf, @NotNull final String sampleName, @NotNull final String tumorBam) {
    final VCFFileReader vcfReader = new VCFFileReader(new File(filePath), false);
    final VCFHeader outputHeader = generateOutputHeader(vcfReader.getFileHeader(), sampleName);
    final VariantContextWriter writer = new VariantContextWriterBuilder().setOutputFile(outputVcf)
            .setReferenceDictionary(outputHeader.getSequenceDictionary())
            .build();
    writer.writeHeader(outputHeader);
    final MNVValidator validator = ImmutableMNVValidator.of(tumorBam);
    final MNVMerger merger = ImmutableMNVMerger.of(outputHeader);

    Pair<PotentialMNVRegion, Optional<PotentialMNVRegion>> outputPair = ImmutablePair.of(PotentialMNVRegion.empty(), Optional.empty());

    final VariantContextFilter filter = new StrelkaPostProcess(highConfidenceSlicer);
    for (final VariantContext variantContext : vcfReader) {
        if (filter.test(variantContext)) {
            final VariantContext simplifiedVariant = StrelkaPostProcess.simplifyVariant(variantContext, sampleName);
            final PotentialMNVRegion potentialMNV = outputPair.getLeft();
            outputPair = MNVDetector.addMnvToRegion(potentialMNV, simplifiedVariant);
            outputPair.getRight().ifPresent(mnvRegion -> validator.mergeVariants(mnvRegion, merger).forEach(writer::add));
        }
    }
    validator.mergeVariants(outputPair.getLeft(), merger).forEach(writer::add);
    writer.close();
    vcfReader.close();
    LOGGER.info("Written output variants to " + outputVcf);
}
 
Example 13
Source File: MNVDetectorApplication.java    From hmftools with GNU General Public License v3.0 5 votes vote down vote up
private static void processVariants(@NotNull final String filePath, @NotNull final String outputVcf, @NotNull final String outputBed,
        boolean strelka) throws IOException {
    final VCFFileReader vcfReader = new VCFFileReader(new File(filePath), false);
    final VCFHeader outputHeader =
            strelka ? generateOutputHeader(vcfReader.getFileHeader(), StrelkaPostProcess.TUMOR_GENOTYPE) : vcfReader.getFileHeader();
    final BufferedWriter bedWriter = new BufferedWriter(new FileWriter(outputBed, false));
    final VariantContextWriter vcfWriter = new VariantContextWriterBuilder().setOutputFile(outputVcf)
            .setReferenceDictionary(outputHeader.getSequenceDictionary())
            .build();
    vcfWriter.writeHeader(outputHeader);

    Pair<PotentialMNVRegion, Optional<PotentialMNVRegion>> outputPair = ImmutablePair.of(PotentialMNVRegion.empty(), Optional.empty());
    for (final VariantContext rawVariant : vcfReader) {
        final VariantContext variant =
                strelka ? StrelkaPostProcess.simplifyVariant(rawVariant, StrelkaPostProcess.TUMOR_GENOTYPE) : rawVariant;

        final PotentialMNVRegion potentialMNVregion = outputPair.getLeft();
        outputPair = MNVDetector.addMnvToRegion(potentialMNVregion, variant);
        outputPair.getRight()
                .ifPresent(mnvRegion -> filterMnvRegion(mnvRegion).ifPresent(filteredRegion -> writeMnvRegionToFiles(filteredRegion,
                        vcfWriter,
                        bedWriter,
                        "\n")));
    }
    filterMnvRegion(outputPair.getLeft()).ifPresent(mnvRegion -> writeMnvRegionToFiles(mnvRegion, vcfWriter, bedWriter, ""));
    vcfWriter.close();
    vcfReader.close();
    bedWriter.close();
    LOGGER.info("Written output variants to {}. Written bed regions to {}.", outputVcf, outputBed);
}
 
Example 14
Source File: CombineGVCFs.java    From gatk with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
private VariantContextWriter getVCFWriter() {
    final SortedSet<String> samples = getSamplesForVariants();

    final VCFHeader inputVCFHeader = new VCFHeader(getHeaderForVariants().getMetaDataInInputOrder(), samples);

    final Set<VCFHeaderLine> headerLines = new LinkedHashSet<>(inputVCFHeader.getMetaDataInInputOrder());
    headerLines.addAll(getDefaultToolVCFHeaderLines());

    headerLines.addAll(annotationEngine.getVCFAnnotationDescriptions());

    // add headers for annotations added by this tool
    headerLines.add(VCFStandardHeaderLines.getInfoLine(VCFConstants.DEPTH_KEY));   // needed for gVCFs without DP tags
    if ( dbsnp.dbsnp != null  ) {
        VCFStandardHeaderLines.addStandardInfoLines(headerLines, true, VCFConstants.DBSNP_KEY);
    }

    if (somaticInput) {
        //single-sample M2 variant filter status will get moved to genotype filter
        headerLines.add(VCFStandardHeaderLines.getFormatLine(VCFConstants.GENOTYPE_FILTER_KEY));

        if (!dropSomaticFilteringAnnotations) {
            //standard M2 INFO annotations for filtering will get moved to FORMAT field
            for (final String key : Mutect2FilteringEngine.STANDARD_MUTECT_INFO_FIELDS_FOR_FILTERING) {
                headerLines.add(GATKVCFHeaderLines.getEquivalentFormatHeaderLine(key));
            }
        }
    }

    VariantContextWriter writer = createVCFWriter(outputFile);

    final Set<String> sampleNameSet = new IndexedSampleList(samples).asSetOfSamples();
    final VCFHeader vcfHeader = new VCFHeader(headerLines, new TreeSet<>(sampleNameSet));
    writer.writeHeader(vcfHeader);

    return writer;
}
 
Example 15
Source File: AmberVCF.java    From hmftools with GNU General Public License v3.0 5 votes vote down vote up
void writeSNPCheck(@NotNull final String filename, @NotNull final List<BaseDepth> baseDepths) {
    final List<BaseDepth> list = Lists.newArrayList(baseDepths);
    Collections.sort(list);

    final VariantContextWriter writer =
            new VariantContextWriterBuilder().setOutputFile(filename).modifyOption(Options.INDEX_ON_THE_FLY, true).build();
    final VCFHeader header = header(Lists.newArrayList(config.primaryReference()));
    writer.setHeader(header);
    writer.writeHeader(header);

    list.forEach(x -> writer.add(create(x)));
    writer.close();
}
 
Example 16
Source File: AmberVCF.java    From hmftools with GNU General Public License v3.0 5 votes vote down vote up
void writeContamination(@NotNull final String filename, @NotNull final Collection<TumorContamination> evidence) {
    final List<TumorContamination> list = Lists.newArrayList(evidence);
    Collections.sort(list);

    final VariantContextWriter writer =
            new VariantContextWriterBuilder().setOutputFile(filename).modifyOption(Options.INDEX_ON_THE_FLY, true).build();
    final VCFHeader header = header(Lists.newArrayList(config.primaryReference(), config.tumor()));
    writer.setHeader(header);
    writer.writeHeader(header);

    list.forEach(x -> writer.add(create(x)));
    writer.close();
}
 
Example 17
Source File: FilterVcf.java    From picard with MIT License 4 votes vote down vote up
@Override
protected int doWork() {
    IOUtil.assertFileIsReadable(INPUT);
    IOUtil.assertFileIsWritable(OUTPUT);

    VCFFileReader in = null;
    VariantContextWriter out = null;
    try {// try/finally used to close 'in' and 'out'
        in = new VCFFileReader(INPUT, false);
        final List<VariantFilter> variantFilters = new ArrayList<>(4);
        variantFilters.add(new AlleleBalanceFilter(MIN_AB));
        variantFilters.add(new FisherStrandFilter(MAX_FS));
        variantFilters.add(new QdFilter(MIN_QD));
        if (JAVASCRIPT_FILE != null) {
            try {
                variantFilters.add(new VariantContextJavascriptFilter(JAVASCRIPT_FILE, in.getFileHeader()));
            } catch (final IOException error) {
                throw new PicardException("javascript-related error", error);
            }
        }
        final List<GenotypeFilter> genotypeFilters = CollectionUtil.makeList(new GenotypeQualityFilter(MIN_GQ), new DepthFilter(MIN_DP));
        final FilterApplyingVariantIterator iterator = new FilterApplyingVariantIterator(in.iterator(), variantFilters, genotypeFilters);

        final VCFHeader header = in.getFileHeader();
        // If the user is writing to a .bcf or .vcf, VariantContextBuilderWriter requires a Sequence Dictionary.  Make sure that the
        // Input VCF has one.
        final VariantContextWriterBuilder variantContextWriterBuilder = new VariantContextWriterBuilder();
        if (isVcfOrBcf(OUTPUT)) {
            final SAMSequenceDictionary sequenceDictionary = header.getSequenceDictionary();
            if (sequenceDictionary == null) {
                throw new PicardException("The input vcf must have a sequence dictionary in order to create indexed vcf or bcfs.");
            }
            variantContextWriterBuilder.setReferenceDictionary(sequenceDictionary);
        }
        out = variantContextWriterBuilder.setOutputFile(OUTPUT).build();
        header.addMetaDataLine(new VCFFilterHeaderLine("AllGtsFiltered", "Site filtered out because all genotypes are filtered out."));
        header.addMetaDataLine(new VCFFormatHeaderLine("FT", VCFHeaderLineCount.UNBOUNDED, VCFHeaderLineType.String, "Genotype filters."));
        for (final VariantFilter filter : variantFilters) {
            filter.headerLines().forEach(header::addMetaDataLine);
        }

        out.writeHeader(in.getFileHeader());

        while (iterator.hasNext()) {
            final VariantContext vc = iterator.next();
            progress.record(vc.getContig(), vc.getStart());
            out.add(vc);
        }
        return 0;
    } finally {
        CloserUtil.close(out);
        CloserUtil.close(in);
    }
}
 
Example 18
Source File: VcfOutputRenderer.java    From gatk with BSD 3-Clause "New" or "Revised" License 4 votes vote down vote up
/**
 * Create a {@link VcfOutputRenderer}.
 *
 * @param vcfWriter a pre-initialized {@link VariantContextWriter} used for writing the output (must not be null).
 * @param dataSources {@link List} of {@link DataSourceFuncotationFactory} to back our annotations (must not be null).
 * @param existingHeader {@link VCFHeader} of input VCF file to preserve (must not be null).
 * @param unaccountedForDefaultAnnotations {@link LinkedHashMap} of default annotations that must be added (must not be null).
 * @param unaccountedForOverrideAnnotations {@link LinkedHashMap} of override annotations that must be added (must not be null).
 * @param defaultToolVcfHeaderLines Lines to add to the header with information about the tool (must not be null).
 * @param excludedOutputFields Fields that should not be rendered in the final output. Only exact name matches will be excluded (must not be null).
 * @param toolVersion The version number of the tool used to produce the VCF file (must not be null).
 */
public VcfOutputRenderer(final VariantContextWriter vcfWriter,
                         final List<DataSourceFuncotationFactory> dataSources,
                         final VCFHeader existingHeader,
                         final LinkedHashMap<String, String> unaccountedForDefaultAnnotations,
                         final LinkedHashMap<String, String> unaccountedForOverrideAnnotations,
                         final Set<VCFHeaderLine> defaultToolVcfHeaderLines,
                         final Set<String> excludedOutputFields,
                         final String toolVersion) {
    super(toolVersion);

    Utils.nonNull(vcfWriter);
    Utils.nonNull(dataSources);
    Utils.nonNull(existingHeader);
    Utils.nonNull(unaccountedForDefaultAnnotations);
    Utils.nonNull(unaccountedForOverrideAnnotations);
    Utils.nonNull(defaultToolVcfHeaderLines);
    Utils.nonNull(excludedOutputFields);

    this.vcfWriter = vcfWriter;
    this.existingHeader = existingHeader;
    this.dataSourceFactories = dataSources;

    // Merge the annotations into our manualAnnotations:
    manualAnnotations = new LinkedHashMap<>();
    manualAnnotations.putAll(unaccountedForDefaultAnnotations);
    manualAnnotations.putAll(unaccountedForOverrideAnnotations);

    // Get our default tool VCF header lines:
    this.defaultToolVcfHeaderLines = new LinkedHashSet<>(defaultToolVcfHeaderLines);

    // Please note that this assumes that there is no conversion between the name given by the datasource (or user)
    //  and the output name.
    finalFuncotationFieldNames = Stream.concat(getDataSourceFieldNamesForHeaderAsList(dataSourceFactories).stream(), manualAnnotations.keySet().stream())
            .filter(f -> !excludedOutputFields.contains(f))
            .collect(Collectors.toList());

    // Open the output file and set up the header:
    final VCFHeader newHeader = createVCFHeader();
    vcfWriter.writeHeader(newHeader);
}
 
Example 19
Source File: HaplotypeCallerEngine.java    From gatk with BSD 3-Clause "New" or "Revised" License 4 votes vote down vote up
/**
 * Writes an appropriate VCF header, given our arguments, to the provided writer
 *
 * @param vcfWriter writer to which the header should be written
 */
public void writeHeader( final VariantContextWriter vcfWriter, final SAMSequenceDictionary sequenceDictionary,
                         final Set<VCFHeaderLine>  defaultToolHeaderLines) {
    Utils.nonNull(vcfWriter);
    vcfWriter.writeHeader(makeVCFHeader(sequenceDictionary, defaultToolHeaderLines));
}
 
Example 20
Source File: MergeVcfs.java    From picard with MIT License 4 votes vote down vote up
@Override
protected int doWork() {
    final ProgressLogger progress = new ProgressLogger(log, 10000);
    final List<String> sampleList = new ArrayList<String>();
    INPUT = IOUtil.unrollFiles(INPUT, IOUtil.VCF_EXTENSIONS);
    final Collection<CloseableIterator<VariantContext>> iteratorCollection = new ArrayList<CloseableIterator<VariantContext>>(INPUT.size());
    final Collection<VCFHeader> headers = new HashSet<VCFHeader>(INPUT.size());
    VariantContextComparator variantContextComparator = null;
    SAMSequenceDictionary sequenceDictionary = null;

    if (SEQUENCE_DICTIONARY != null) {
        sequenceDictionary = SamReaderFactory.makeDefault().referenceSequence(REFERENCE_SEQUENCE).open(SEQUENCE_DICTIONARY).getFileHeader().getSequenceDictionary();
    }

    for (final File file : INPUT) {
        IOUtil.assertFileIsReadable(file);
        final VCFFileReader fileReader = new VCFFileReader(file, false);
        final VCFHeader fileHeader = fileReader.getFileHeader();
        if (fileHeader.getContigLines().isEmpty()) {
            if (sequenceDictionary == null) {
                throw new IllegalArgumentException(SEQ_DICT_REQUIRED);
            } else {
                fileHeader.setSequenceDictionary(sequenceDictionary);
            }
        }

        if (variantContextComparator == null) {
            variantContextComparator = fileHeader.getVCFRecordComparator();
        } else {
            if (!variantContextComparator.isCompatible(fileHeader.getContigLines())) {
                throw new IllegalArgumentException(
                        "The contig entries in input file " + file.getAbsolutePath() + " are not compatible with the others.");
            }
        }

        if (sequenceDictionary == null) sequenceDictionary = fileHeader.getSequenceDictionary();

        if (sampleList.isEmpty()) {
            sampleList.addAll(fileHeader.getSampleNamesInOrder());
        } else {
            if (!sampleList.equals(fileHeader.getSampleNamesInOrder())) {
                throw new IllegalArgumentException("Input file " + file.getAbsolutePath() + " has sample entries that don't match the other files.");
            }
        }
        
        // add comments in the first header
        if (headers.isEmpty()) {
            COMMENT.stream().forEach(C -> fileHeader.addMetaDataLine(new VCFHeaderLine("MergeVcfs.comment", C)));
        }

        headers.add(fileHeader);
        iteratorCollection.add(fileReader.iterator());
    }

    if (CREATE_INDEX && sequenceDictionary == null) {
        throw new PicardException(String.format("Index creation failed. %s", SEQ_DICT_REQUIRED));
    }

    final VariantContextWriterBuilder builder = new VariantContextWriterBuilder()
            .setOutputFile(OUTPUT)
            .setReferenceDictionary(sequenceDictionary);

    if (CREATE_INDEX) {
        builder.setOption(Options.INDEX_ON_THE_FLY);
    } else {
        builder.unsetOption(Options.INDEX_ON_THE_FLY);
    }
    final VariantContextWriter writer = builder.build();

    writer.writeHeader(new VCFHeader(VCFUtils.smartMergeHeaders(headers, false), sampleList));

    final MergingIterator<VariantContext> mergingIterator = new MergingIterator<VariantContext>(variantContextComparator, iteratorCollection);
    while (mergingIterator.hasNext()) {
        final VariantContext context = mergingIterator.next();
        writer.add(context);
        progress.record(context.getContig(), context.getStart());
    }

    CloserUtil.close(mergingIterator);
    writer.close();
    return 0;
}