htsjdk.variant.variantcontext.writer.Options Java Examples

The following examples show how to use htsjdk.variant.variantcontext.writer.Options. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: AmberVCF.java    From hmftools with GNU General Public License v3.0 6 votes vote down vote up
public void writeBAF(@NotNull final String filename, @NotNull final Collection<TumorBAF> tumorEvidence,  @NotNull final AmberHetNormalEvidence hetNormalEvidence) {
    final List<TumorBAF> list = Lists.newArrayList(tumorEvidence);
    Collections.sort(list);

    final VariantContextWriter writer =
            new VariantContextWriterBuilder().setOutputFile(filename).modifyOption(Options.INDEX_ON_THE_FLY, true).build();
    final VCFHeader header = header(config.tumorOnly() ? Collections.singletonList(config.tumor()) : config.allSamples());
    writer.setHeader(header);
    writer.writeHeader(header);

    final ListMultimap<AmberSite, Genotype> genotypeMap = ArrayListMultimap.create();
    for (final String sample : hetNormalEvidence.samples()) {
        for (BaseDepth baseDepth : hetNormalEvidence.evidence(sample)) {
            genotypeMap.put(AmberSiteFactory.asSite(baseDepth), createGenotype(sample, baseDepth));
        }
    }

    for (final TumorBAF tumorBAF : list) {
        AmberSite tumorSite = AmberSiteFactory.tumorSite(tumorBAF);
        genotypeMap.put(tumorSite, createGenotype(tumorBAF));
        writer.add(create(tumorBAF, genotypeMap.get(tumorSite)));
    }

    writer.close();
}
 
Example #2
Source File: GATKVariantContextUtilsUnitTest.java    From gatk with BSD 3-Clause "New" or "Revised" License 6 votes vote down vote up
@Test(dataProvider = "createVCFWriterLenientData", expectedExceptions = IllegalStateException.class)
public void testCreateVCFWriterLenientFalse(
        final String outputExtension,
        final String indexExtension, // unused
        final boolean createIndex,
        final boolean createMD5) throws IOException {

    final File tmpDir = createTempDir("createVCFTest");
    final File outputFile = new File(tmpDir.getAbsolutePath(), "createVCFTest" + outputExtension);

    Options options[] = createIndex ?
            new Options[] {Options.INDEX_ON_THE_FLY} :
            new Options[] {};
    try (final VariantContextWriter vcw = GATKVariantContextUtils.createVCFWriter(
            outputFile.toPath(),
            makeSimpleSequenceDictionary(),
            createMD5,
            options)) {
        writeHeader(vcw);
        writeBadVariant(vcw); // write a bad attribute and throw...
    }
}
 
Example #3
Source File: GenotypeConcordance.java    From picard with MIT License 6 votes vote down vote up
/** Gets the variant context writer if the output VCF is to be written, otherwise empty. */
private Optional<VariantContextWriter> getVariantContextWriter(final VCFFileReader truthReader, final VCFFileReader callReader) {
    if (OUTPUT_VCF) {
        final File outputVcfFile = new File(OUTPUT + OUTPUT_VCF_FILE_EXTENSION);
        final VariantContextWriterBuilder builder = new VariantContextWriterBuilder()
                .setOutputFile(outputVcfFile)
                .setReferenceDictionary(callReader.getFileHeader().getSequenceDictionary())
                .setOption(Options.ALLOW_MISSING_FIELDS_IN_HEADER)
                .setOption(Options.INDEX_ON_THE_FLY);
        final VariantContextWriter writer = builder.build();

        // create the output header
        final List<String> sampleNames = Arrays.asList(OUTPUT_VCF_CALL_SAMPLE_NAME, OUTPUT_VCF_TRUTH_SAMPLE_NAME);
        final Set<VCFHeaderLine> headerLines = new HashSet<>();
        headerLines.addAll(callReader.getFileHeader().getMetaDataInInputOrder());
        headerLines.addAll(truthReader.getFileHeader().getMetaDataInInputOrder());
        headerLines.add(CONTINGENCY_STATE_HEADER_LINE);
        writer.writeHeader(new VCFHeader(headerLines, sampleNames));
        return Optional.of(writer);
    }
    else {
        return Optional.empty();
    }
}
 
Example #4
Source File: PurpleStructuralVariantSupplier.java    From hmftools with GNU General Public License v3.0 6 votes vote down vote up
void write(@NotNull final PurityAdjuster purityAdjuster, @NotNull final List<PurpleCopyNumber> copyNumbers) throws IOException {
    if (header.isPresent()) {
        try (final IndexedFastaSequenceFile indexedFastaSequenceFile = new IndexedFastaSequenceFile(new File(refGenomePath));
                final VariantContextWriter writer = new VariantContextWriterBuilder().setOutputFile(outputVCF)
                        .setReferenceDictionary(header.get().getSequenceDictionary())
                        .setIndexCreator(new TabixIndexCreator(header.get().getSequenceDictionary(), new TabixFormat()))
                        .setOutputFileType(VariantContextWriterBuilder.OutputType.BLOCK_COMPRESSED_VCF)
                        .setOption(Options.ALLOW_MISSING_FIELDS_IN_HEADER)
                        .build()) {

            final StructuralRefContextEnrichment refEnricher =
                    new StructuralRefContextEnrichment(indexedFastaSequenceFile, writer::add);
            writer.writeHeader(refEnricher.enrichHeader(header.get()));

            enriched(purityAdjuster, copyNumbers).forEach(refEnricher);
            refEnricher.flush();
        }
    }
}
 
Example #5
Source File: SortVcf.java    From picard with MIT License 5 votes vote down vote up
private void writeSortedOutput(final VCFHeader outputHeader, final SortingCollection<VariantContext> sortedOutput) {
    final ProgressLogger writeProgress = new ProgressLogger(log, 25000, "wrote", "records");
    final EnumSet<Options> options = CREATE_INDEX ? EnumSet.of(Options.INDEX_ON_THE_FLY) : EnumSet.noneOf(Options.class);
    final VariantContextWriter out = new VariantContextWriterBuilder().
            setReferenceDictionary(outputHeader.getSequenceDictionary()).
            setOptions(options).
            setOutputFile(OUTPUT).build();
    out.writeHeader(outputHeader);
    for (final VariantContext variantContext : sortedOutput) {
        out.add(variantContext);
        writeProgress.record(variantContext.getContig(), variantContext.getStart());
    }
    out.close();
}
 
Example #6
Source File: GenomicsDBImportIntegrationTest.java    From gatk with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
private static File createInputVCF(final String sampleName) {
    final String contig = "chr20";
    final SAMSequenceDictionary dict = new SAMSequenceDictionary(
            Collections.singletonList(new SAMSequenceRecord(contig, 64444167)));

    final VCFFormatHeaderLine formatField = new VCFFormatHeaderLine(SAMPLE_NAME_KEY, 1, VCFHeaderLineType.String,
                                                                    "the name of the sample this genotype came from");
    final Set<VCFHeaderLine> headerLines = new HashSet<>();
    headerLines.add(formatField);
    headerLines.add(new VCFFormatHeaderLine(ANOTHER_ATTRIBUTE_KEY, 1, VCFHeaderLineType.Integer, "Another value"));
    headerLines.add(VCFStandardHeaderLines.getFormatLine("GT"));

    final File out = createTempFile(sampleName +"_", ".vcf");
    try (final VariantContextWriter writer = GATKVariantContextUtils.createVCFWriter(out.toPath(), dict, false,
                                                                                     Options.INDEX_ON_THE_FLY)) {
        final VCFHeader vcfHeader = new VCFHeader(headerLines, Collections.singleton(sampleName));
        vcfHeader.setSequenceDictionary(dict);
        writer.writeHeader(vcfHeader);
        final Allele Aref = Allele.create("A", true);
        final Allele C = Allele.create("C");
        final List<Allele> alleles = Arrays.asList(Aref, C);
        final VariantContext variant = new VariantContextBuilder("invented", contig, INTERVAL.get(0).getStart(), INTERVAL.get(0).getStart(), alleles)
                .genotypes(new GenotypeBuilder(sampleName, alleles).attribute(SAMPLE_NAME_KEY, sampleName)
                                   .attribute(ANOTHER_ATTRIBUTE_KEY, 10).make())
                .make();
        writer.add(variant);
        return out;
    }
}
 
Example #7
Source File: GATKVariantContextUtilsUnitTest.java    From gatk with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
@Test
public void testOnTheFlyTabixCreation() throws IOException {
    final File inputGZIPFile = new File(publicTestDir + "org/broadinstitute/hellbender/engine/8_mutect2_sorted.vcf.gz");
    final File outputGZIPFile = createTempFile("testOnTheFlyTabixCreation", ".vcf.gz");

    long recordCount = 0;
    try (final VariantContextWriter vcfWriter = GATKVariantContextUtils.createVCFWriter(
             outputGZIPFile.toPath(),
             null,
             false,
             Options.INDEX_ON_THE_FLY);
        final FeatureReader<VariantContext> inputFileReader =
                 AbstractFeatureReader.getFeatureReader(inputGZIPFile.getAbsolutePath(), new VCFCodec(), false))
    {
        vcfWriter.writeHeader((VCFHeader)inputFileReader.getHeader());
        final Iterator<VariantContext> it = inputFileReader.iterator();
        while (it.hasNext()) {
            vcfWriter.add(it.next());
            recordCount++;
        }
    }

    // make sure we got a tabix index
    final File tabixIndexFile = new File(outputGZIPFile.getAbsolutePath() + FileExtensions.TABIX_INDEX);
    Assert.assertTrue(tabixIndexFile.exists());
    Assert.assertTrue(tabixIndexFile.length() > 0);

    // verify the index via query
    final SimpleInterval queryInterval = new SimpleInterval("chr6:33414233-118314029");
    try (final FeatureReader<VariantContext> outputFileReader = AbstractFeatureReader.getFeatureReader(
            outputGZIPFile.getAbsolutePath(),
            new VCFCodec(),
            true)) // require index
    {
        final long actualCount = outputFileReader.query(
                queryInterval.getContig(), queryInterval.getStart(), queryInterval.getEnd()).stream().count();
        Assert.assertEquals(actualCount, recordCount);
    }
}
 
Example #8
Source File: GATKVariantContextUtilsUnitTest.java    From gatk with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
@Test(expectedExceptions=IllegalArgumentException.class)
public void testCreateVariantContextWriterNoReference() {
    // should throw due to lack of reference
    final File outFile = createTempFile("testVCFWriter", ".vcf");
    final VariantContextWriter vcw =
                 GATKVariantContextUtils.createVCFWriter(
                         outFile.toPath(),
                         null,
                         true,
                         Options.INDEX_ON_THE_FLY);
    vcw.close();
}
 
Example #9
Source File: GATKVariantContextUtilsUnitTest.java    From gatk with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
@Test(dataProvider = "createVCFWriterLenientData")
public void testCreateVCFWriterLenientTrue(
        final String outputExtension,
        final String indexExtension,
        final boolean createIndex,
        final boolean createMD5) throws IOException {

    final File tmpDir = createTempDir("createVCFTest");
    final File outputFile = new File(tmpDir.getAbsolutePath(), "createVCFTest" + outputExtension);

    Options options[] = createIndex ?
            new Options[] {Options.ALLOW_MISSING_FIELDS_IN_HEADER, Options.INDEX_ON_THE_FLY} :
            new Options[] {Options.ALLOW_MISSING_FIELDS_IN_HEADER};
    try (final VariantContextWriter vcw = GATKVariantContextUtils.createVCFWriter(
            outputFile.toPath(),
            makeSimpleSequenceDictionary(),
            createMD5,
            options)) {
        writeHeader(vcw);
        writeBadVariant(vcw);  // verify leniency by writing a bogus attribute
    }

    final File outFileIndex = new File(outputFile.getAbsolutePath() + indexExtension);
    final File outFileMD5 = new File(outputFile.getAbsolutePath() + ".md5");

    Assert.assertTrue(outputFile.exists(), "No output file was not created");
    Assert.assertEquals(outFileIndex.exists(), createIndex, "The createIndex argument was not honored");
    Assert.assertEquals(outFileMD5.exists(), createMD5, "The createMD5 argument was not honored");
}
 
Example #10
Source File: GATKVariantContextUtilsUnitTest.java    From gatk with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
@Test(dataProvider = "createVCFWriterData")
public void testCreateVCFWriterWithOptions(
        final String outputExtension,
        final String indexExtension,
        final boolean createIndex,
        final boolean createMD5) throws IOException {

    final File tmpDir = createTempDir("createVCFTest");
    final File outputFile = new File(tmpDir.getAbsolutePath(), "createVCFTest" + outputExtension);

    Options options[] = createIndex ?
            new Options[] {Options.INDEX_ON_THE_FLY} :
            new Options[] {};
    try (final VariantContextWriter writer = GATKVariantContextUtils.createVCFWriter(
            outputFile.toPath(),
            makeSimpleSequenceDictionary(),
            createMD5,
            options)) {
        writeHeader(writer);
    }

    final File outFileIndex = new File(outputFile.getAbsolutePath() + indexExtension);
    final File outFileMD5 = new File(outputFile.getAbsolutePath() + ".md5");

    Assert.assertTrue(outputFile.exists(), "No output file was not created");
    Assert.assertEquals(outFileIndex.exists(), createIndex, "The createIndex argument was not honored");
    Assert.assertEquals(outFileMD5.exists(), createMD5, "The createMD5 argument was not honored");

    verifyFileType(outputFile, outputExtension);
}
 
Example #11
Source File: SVVCFWriter.java    From gatk with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
private static VariantContextWriter getVariantContextWriter(final OutputStream outputStream,
                                                            final SAMSequenceDictionary referenceSequenceDictionary) {
    VariantContextWriterBuilder vcWriterBuilder = new VariantContextWriterBuilder()
            .clearOptions()
            .setOutputStream(outputStream);

    if (null != referenceSequenceDictionary) {
        vcWriterBuilder = vcWriterBuilder.setReferenceDictionary(referenceSequenceDictionary);
    }
    for (final Options opt : new Options[]{}) {
        vcWriterBuilder = vcWriterBuilder.setOption(opt);
    }

    return vcWriterBuilder.build();
}
 
Example #12
Source File: HaplotypeCallerEngine.java    From gatk with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
/**
 * Create a VCF or GVCF writer as appropriate, given our arguments
 *
 * @param outputVCF location to which the vcf should be written
 * @param readsDictionary sequence dictionary for the reads
 * @return a VCF or GVCF writer as appropriate, ready to use
 */
public VariantContextWriter makeVCFWriter( final String outputVCF, final SAMSequenceDictionary readsDictionary,
                                           final boolean createOutputVariantIndex, final boolean  createOutputVariantMD5,
                                           final boolean sitesOnlyMode ) {
    Utils.nonNull(outputVCF);
    Utils.nonNull(readsDictionary);

    final List<Options> options = new ArrayList<>(2);
    if (createOutputVariantIndex) {options.add(Options.INDEX_ON_THE_FLY);}
    if (sitesOnlyMode) {options.add(Options.DO_NOT_WRITE_GENOTYPES);}

    VariantContextWriter writer = GATKVariantContextUtils.createVCFWriter(
            IOUtils.getPath(outputVCF),
            readsDictionary,
            createOutputVariantMD5,
            options.toArray(new Options[options.size()])
    );

    if ( hcArgs.emitReferenceConfidence == ReferenceConfidenceMode.GVCF ) {
        try {
            writer = new GVCFWriter(writer, new ArrayList<Number>(hcArgs.GVCFGQBands), hcArgs.standardArgs.genotypeArgs.samplePloidy, hcArgs.floorBlocks);
        } catch ( IllegalArgumentException e ) {
            throw new CommandLineException.BadArgumentValue("GQBands", "are malformed: " + e.getMessage());
        }
    }

    return writer;
}
 
Example #13
Source File: GATKVariantContextUtils.java    From gatk with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
/**
 * Creates a VariantContextWriter whose outputFile type is based on the extension of the output file name.
 * The default options set by VariantContextWriter are cleared before applying ALLOW_MISSING_FIELDS_IN_HEADER (if
 * <code>lenientProcessing</code> is set), followed by the set of options specified by any <code>options</code> args.
 *
 * @param outPath output Path for this writer. May not be null.
 * @param referenceDictionary required if on the fly indexing is set, otherwise can be null
 * @param createMD5 true if an md5 file should be created
 * @param options variable length list of additional Options to be set for this writer
 * @returns VariantContextWriter must be closed by the caller
 */
public static VariantContextWriter createVCFWriter(
        final Path outPath,
        final SAMSequenceDictionary referenceDictionary,
        final boolean createMD5,
        final Options... options)
{
    Utils.nonNull(outPath);

    VariantContextWriterBuilder vcWriterBuilder =
            new VariantContextWriterBuilder().clearOptions().setOutputPath(outPath);

    if (VariantContextWriterBuilder.OutputType.UNSPECIFIED == VariantContextWriterBuilder.determineOutputTypeFromFile(outPath)) {
        // the only way the user has to specify an output type is by file extension, and htsjdk
        // throws if it can't map the file extension to a known vcf type, so fallback to a default
        // of VCF
        logger.warn(String.format(
                "Can't determine output variant file format from output file extension \"%s\". Defaulting to VCF.",
                FilenameUtils.getExtension(outPath.getFileName().toString())));
        vcWriterBuilder = vcWriterBuilder.setOutputFileType(VariantContextWriterBuilder.OutputType.VCF);
    }

    if (createMD5) {
        vcWriterBuilder.setCreateMD5();
    }

    if (null != referenceDictionary) {
        vcWriterBuilder = vcWriterBuilder.setReferenceDictionary(referenceDictionary);
    }

    for (Options opt : options) {
        vcWriterBuilder = vcWriterBuilder.setOption(opt);
    }

    return vcWriterBuilder.build();
}
 
Example #14
Source File: GATKTool.java    From gatk with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
/**
 * Creates a VariantContextWriter whose outputFile type is determined by
 * vcfOutput's extension, using the best available sequence dictionary for
 * this tool, and default index, leniency and md5 generation settings.
 *
 * @param outPath output Path for this writer. May not be null.
 * @returns VariantContextWriter must be closed by the caller
 */
public VariantContextWriter createVCFWriter(final Path outPath) {
    Utils.nonNull(outPath);

    final SAMSequenceDictionary sequenceDictionary = getBestAvailableSequenceDictionary();

    List<Options> options = new ArrayList<>();
    if (lenientVCFProcessing) {
        options.add(Options.ALLOW_MISSING_FIELDS_IN_HEADER);
    }

    if (createOutputVariantIndex) {
        if (null == sequenceDictionary) {
            logger.warn("A variant index will not be created - a sequence dictionary is required to create an output index");
            // fall through and create without an index
        } else {
            options.add(Options.INDEX_ON_THE_FLY);
        }
    }

    if (outputSitesOnlyVCFs) {
        options.add(Options.DO_NOT_WRITE_GENOTYPES);
    }

    return GATKVariantContextUtils.createVCFWriter(
            outPath,
            sequenceDictionary,
            createOutputVariantMD5,
            options.toArray(new Options[options.size()]));
}
 
Example #15
Source File: VcfTestUtils.java    From picard with MIT License 5 votes vote down vote up
/**
 * This method makes a copy of the input VCF and creates an index file for it in the same location.
 * This is done so that we don't need to store the index file in the same repo
 * The copy of the input is done so that it and its index are in the same directory which is typically required.
 *
 * @param vcfFile the vcf file to index
 * @return File a vcf file (index file is created in same path).
 */
public static File createTemporaryIndexedVcfFromInput(final File vcfFile, final String tempFilePrefix, final String suffix) throws IOException {
    final String extension;

    if (suffix != null) {
        extension = suffix;
    } else if (vcfFile.getAbsolutePath().endsWith(".vcf")) {
        extension = ".vcf";
    } else if (vcfFile.getAbsolutePath().endsWith(".vcf.gz")) {
        extension = ".vcf.gz";
    } else {
        extension = "";
    }

    if (!extension.equals(".vcf") && !extension.equals(".vcf.gz")) {
        throw new IllegalArgumentException("couldn't find a .vcf or .vcf.gz ending for input file " + vcfFile.getAbsolutePath());
    }

    File output = createTemporaryIndexedFile(tempFilePrefix, extension);

    try (final VCFFileReader in = new VCFFileReader(vcfFile, false)) {
        final VCFHeader header = in.getFileHeader();
        try (final VariantContextWriter out = new VariantContextWriterBuilder().
                setReferenceDictionary(header.getSequenceDictionary()).
                setOptions(EnumSet.of(Options.INDEX_ON_THE_FLY)).
                setOutputFile(output).build()) {
            out.writeHeader(header);
            for (final VariantContext ctx : in) {
                out.add(ctx);
            }
        }
    }
    return output;
}
 
Example #16
Source File: UpdateVcfSequenceDictionary.java    From picard with MIT License 5 votes vote down vote up
@Override
protected int doWork() {
    IOUtil.assertFileIsReadable(INPUT);
    IOUtil.assertFileIsReadable(SEQUENCE_DICTIONARY);
    IOUtil.assertFileIsWritable(OUTPUT);

    final SAMSequenceDictionary samSequenceDictionary = SAMSequenceDictionaryExtractor.extractDictionary(SEQUENCE_DICTIONARY.toPath());

    final VCFFileReader fileReader = new VCFFileReader(INPUT, false);
    final VCFHeader fileHeader = fileReader.getFileHeader();

    final VariantContextWriterBuilder builder = new VariantContextWriterBuilder()
            .setReferenceDictionary(samSequenceDictionary)
            .clearOptions();
    if (CREATE_INDEX)
        builder.setOption(Options.INDEX_ON_THE_FLY);

    final VariantContextWriter vcfWriter = builder.setOutputFile(OUTPUT).build();
    fileHeader.setSequenceDictionary(samSequenceDictionary);
    vcfWriter.writeHeader(fileHeader);

    final ProgressLogger progress = new ProgressLogger(log, 10000);
    final CloseableIterator<VariantContext> iterator = fileReader.iterator();
    while (iterator.hasNext()) {
        final VariantContext context = iterator.next();
        vcfWriter.add(context);
        progress.record(context.getContig(), context.getStart());
    }

    CloserUtil.close(iterator);
    CloserUtil.close(fileReader);
    vcfWriter.close();

    return 0;
}
 
Example #17
Source File: RenameSampleInVcf.java    From picard with MIT License 5 votes vote down vote up
@Override
protected int doWork() {
    IOUtil.assertFileIsReadable(INPUT);
    IOUtil.assertFileIsWritable(OUTPUT);

    final VCFFileReader in = new VCFFileReader(INPUT, false);
    final VCFHeader header = in.getFileHeader();

    if (header.getGenotypeSamples().size() > 1) {
        throw new IllegalArgumentException("Input VCF must be single-sample.");
    }

    if (OLD_SAMPLE_NAME != null && !OLD_SAMPLE_NAME.equals(header.getGenotypeSamples().get(0))) {
        throw new IllegalArgumentException("Input VCF did not contain expected sample. Contained: " + header.getGenotypeSamples().get(0));
    }

    final EnumSet<Options> options = EnumSet.copyOf(VariantContextWriterBuilder.DEFAULT_OPTIONS);
    if (CREATE_INDEX) options.add(Options.INDEX_ON_THE_FLY); else options.remove(Options.INDEX_ON_THE_FLY);

    final VCFHeader outHeader = new VCFHeader(header.getMetaDataInInputOrder(), CollectionUtil.makeList(NEW_SAMPLE_NAME));
    final VariantContextWriter out = new VariantContextWriterBuilder()
            .setOptions(options)
            .setOutputFile(OUTPUT).setReferenceDictionary(outHeader.getSequenceDictionary()).build();
    out.writeHeader(outHeader);

    for (final VariantContext ctx : in) {
        out.add(ctx);
    }

    out.close();
    in.close();

    return 0;
}
 
Example #18
Source File: VcfFormatConverter.java    From picard with MIT License 5 votes vote down vote up
@Override
protected int doWork() {
    final ProgressLogger progress = new ProgressLogger(LOG, 10000);
    
    IOUtil.assertFileIsReadable(INPUT);
    IOUtil.assertFileIsWritable(OUTPUT);

    final VCFFileReader reader = new VCFFileReader(INPUT, REQUIRE_INDEX);
    final VCFHeader header = new VCFHeader(reader.getFileHeader());
    final SAMSequenceDictionary sequenceDictionary = header.getSequenceDictionary();
    if (CREATE_INDEX && sequenceDictionary == null) {
        throw new PicardException("A sequence dictionary must be available in the input file when creating indexed output.");
    }

    final VariantContextWriterBuilder builder = new VariantContextWriterBuilder()
            .setOutputFile(OUTPUT)
            .setReferenceDictionary(sequenceDictionary);
    if (CREATE_INDEX)
        builder.setOption(Options.INDEX_ON_THE_FLY);
    else
        builder.unsetOption(Options.INDEX_ON_THE_FLY);
    final VariantContextWriter writer = builder.build();
    writer.writeHeader(header);
    final CloseableIterator<VariantContext> iterator = reader.iterator();

    while (iterator.hasNext()) {
        final VariantContext context = iterator.next();
        writer.add(context);
        progress.record(context.getContig(), context.getStart());
    }

    CloserUtil.close(iterator);
    CloserUtil.close(reader);
    writer.close();

    return 0;
}
 
Example #19
Source File: CreateSomaticPanelOfNormals.java    From gatk-protected with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
public Object doWork() {
    final List<File> inputVcfs = new ArrayList<>(vcfs);
    final Collection<CloseableIterator<VariantContext>> iterators = new ArrayList<>(inputVcfs.size());
    final Collection<VCFHeader> headers = new HashSet<>(inputVcfs.size());
    final VCFHeader headerOfFirstVcf = new VCFFileReader(inputVcfs.get(0), false).getFileHeader();
    final SAMSequenceDictionary sequenceDictionary = headerOfFirstVcf.getSequenceDictionary();
    final VariantContextComparator comparator = headerOfFirstVcf.getVCFRecordComparator();


    for (final File vcf : inputVcfs) {
        final VCFFileReader reader = new VCFFileReader(vcf, false);
        iterators.add(reader.iterator());
        final VCFHeader header = reader.getFileHeader();
        Utils.validateArg(comparator.isCompatible(header.getContigLines()), () -> vcf.getAbsolutePath() + " has incompatible contigs.");
        headers.add(header);
    }

    final VariantContextWriter writer = GATKVariantContextUtils.createVCFWriter(outputVcf, sequenceDictionary, false, Options.INDEX_ON_THE_FLY);
    writer.writeHeader(new VCFHeader(VCFUtils.smartMergeHeaders(headers, false)));

    final MergingIterator<VariantContext> mergingIterator = new MergingIterator<>(comparator, iterators);
    SimpleInterval currentPosition = new SimpleInterval("FAKE", 1, 1);
    final List<VariantContext> variantsAtThisPosition = new ArrayList<>(20);
    while (mergingIterator.hasNext()) {
        final VariantContext vc = mergingIterator.next();
        if (!currentPosition.overlaps(vc)) {
            processVariantsAtSamePosition(variantsAtThisPosition, writer);
            variantsAtThisPosition.clear();
            currentPosition = new SimpleInterval(vc.getContig(), vc.getStart(), vc.getStart());
        }
        variantsAtThisPosition.add(vc);
    }
    mergingIterator.close();
    writer.close();

    return "SUCCESS";
}
 
Example #20
Source File: ViccExtractorTestApplication.java    From hmftools with GNU General Public License v3.0 5 votes vote down vote up
private static void writeHotspots(@NotNull String hotspotVcf, @NotNull Map<ViccEntry, ViccExtractionResult> resultsPerEntry) {
    VariantContextWriter writer = new VariantContextWriterBuilder().setOutputFile(hotspotVcf)
            .setOutputFileType(VariantContextWriterBuilder.OutputType.VCF)
            .setOption(Options.ALLOW_MISSING_FIELDS_IN_HEADER)
            .modifyOption(Options.INDEX_ON_THE_FLY, false)
            .build();

    VCFHeader header = new VCFHeader(Sets.newHashSet(), Lists.newArrayList());
    writer.writeHeader(header);

    for (Map.Entry<VariantHotspot, HotspotAnnotation> entry : convertAndSort(resultsPerEntry).entrySet()) {
        VariantHotspot hotspot = entry.getKey();
        HotspotAnnotation annotation = entry.getValue();
        List<Allele> hotspotAlleles = buildAlleles(hotspot);

        VariantContext variantContext = new VariantContextBuilder().noGenotypes()
                .source("VICC")
                .chr(hotspot.chromosome())
                .start(hotspot.position())
                .alleles(hotspotAlleles)
                .computeEndFromAlleles(hotspotAlleles, (int) hotspot.position())
                .attribute("sources", annotation.sources())
                .attribute("feature",
                        ProteinKeyFormatter.toProteinKey(annotation.gene(), annotation.transcript(), annotation.proteinAnnotation()))
                .make();

        LOGGER.debug("Writing {}", variantContext);
        writer.add(variantContext);

    }
    writer.close();
}
 
Example #21
Source File: PonVCF.java    From hmftools with GNU General Public License v3.0 5 votes vote down vote up
PonVCF(final String output, int sampleSize) {
    writer = new VariantContextWriterBuilder().setOutputFile(output)
            .modifyOption(Options.INDEX_ON_THE_FLY, false)
            .modifyOption(Options.USE_ASYNC_IO, false)
            .modifyOption(Options.DO_NOT_WRITE_GENOTYPES, true)
            .build();

    final VCFHeader header = new VCFHeader();
    header.addMetaDataLine(new VCFInfoHeaderLine(PON_COUNT, 1, VCFHeaderLineType.Integer, "how many samples had the variant"));
    header.addMetaDataLine(new VCFInfoHeaderLine(PON_TOTAL, 1, VCFHeaderLineType.Integer, "total depth"));
    header.addMetaDataLine(new VCFInfoHeaderLine(PON_MAX, 1, VCFHeaderLineType.Integer, "max depth"));
    header.addMetaDataLine(new VCFHeaderLine("PonInputSampleCount", String.valueOf(sampleSize)));
    writer.writeHeader(header);
}
 
Example #22
Source File: SageVCF.java    From hmftools with GNU General Public License v3.0 5 votes vote down vote up
public SageVCF(@NotNull final IndexedFastaSequenceFile reference, @NotNull final SageConfig config) {
    writer = new VariantContextWriterBuilder().setOutputFile(config.outputFile())
            .modifyOption(Options.INDEX_ON_THE_FLY, true)
            .modifyOption(Options.USE_ASYNC_IO, false)
            .setReferenceDictionary(reference.getSequenceDictionary())
            .build();
    refContextEnrichment = new SomaticRefContextEnrichment(reference, this::writeToFile);

    final VCFHeader header = refContextEnrichment.enrichHeader(header(config));
    header.setSequenceDictionary(reference.getSequenceDictionary());
    writer.writeHeader(header);
}
 
Example #23
Source File: AmberVCF.java    From hmftools with GNU General Public License v3.0 5 votes vote down vote up
void writeSNPCheck(@NotNull final String filename, @NotNull final List<BaseDepth> baseDepths) {
    final List<BaseDepth> list = Lists.newArrayList(baseDepths);
    Collections.sort(list);

    final VariantContextWriter writer =
            new VariantContextWriterBuilder().setOutputFile(filename).modifyOption(Options.INDEX_ON_THE_FLY, true).build();
    final VCFHeader header = header(Lists.newArrayList(config.primaryReference()));
    writer.setHeader(header);
    writer.writeHeader(header);

    list.forEach(x -> writer.add(create(x)));
    writer.close();
}
 
Example #24
Source File: AmberVCF.java    From hmftools with GNU General Public License v3.0 5 votes vote down vote up
void writeContamination(@NotNull final String filename, @NotNull final Collection<TumorContamination> evidence) {
    final List<TumorContamination> list = Lists.newArrayList(evidence);
    Collections.sort(list);

    final VariantContextWriter writer =
            new VariantContextWriterBuilder().setOutputFile(filename).modifyOption(Options.INDEX_ON_THE_FLY, true).build();
    final VCFHeader header = header(Lists.newArrayList(config.primaryReference(), config.tumor()));
    writer.setHeader(header);
    writer.writeHeader(header);

    list.forEach(x -> writer.add(create(x)));
    writer.close();
}
 
Example #25
Source File: HotspotEvidenceVCF.java    From hmftools with GNU General Public License v3.0 5 votes vote down vote up
public void write(@NotNull final String filename, @NotNull final List<HotspotEvidence> evidenceList) {
    final VariantContextWriter writer =
            new VariantContextWriterBuilder().setOutputFile(filename).modifyOption(Options.INDEX_ON_THE_FLY, false).build();
    writer.setHeader(header);
    writer.writeHeader(header);

    final ListMultimap<GenomePosition, HotspotEvidence> evidenceMap = Multimaps.index(evidenceList, GenomePositions::create);
    for (GenomePosition site : evidenceMap.keySet()) {
        final List<HotspotEvidence> evidence = evidenceMap.get(site);
        final VariantContext context = create(evidence);
        writer.add(context);
    }

    writer.close();
}
 
Example #26
Source File: SplitVcfs.java    From picard with MIT License 4 votes vote down vote up
@Override
protected int doWork() {
    IOUtil.assertFileIsReadable(INPUT);
    final ProgressLogger progress = new ProgressLogger(log, 10000);

    final VCFFileReader fileReader = new VCFFileReader(INPUT, false);
    final VCFHeader fileHeader = fileReader.getFileHeader();

    final SAMSequenceDictionary sequenceDictionary =
            SEQUENCE_DICTIONARY != null
                    ? SamReaderFactory.makeDefault().referenceSequence(REFERENCE_SEQUENCE).getFileHeader(SEQUENCE_DICTIONARY).getSequenceDictionary()
                    : fileHeader.getSequenceDictionary();
    if (CREATE_INDEX && sequenceDictionary == null) {
        throw new PicardException("A sequence dictionary must be available (either through the input file or by setting it explicitly) when creating indexed output.");
    }

    final VariantContextWriterBuilder builder = new VariantContextWriterBuilder()
            .setReferenceDictionary(sequenceDictionary)
            .clearOptions();
    if (CREATE_INDEX)
        builder.setOption(Options.INDEX_ON_THE_FLY);

    final VariantContextWriter snpWriter = builder.setOutputFile(SNP_OUTPUT).build();
    final VariantContextWriter indelWriter = builder.setOutputFile(INDEL_OUTPUT).build();
    snpWriter.writeHeader(fileHeader);
    indelWriter.writeHeader(fileHeader);

    int incorrectVariantCount = 0;

    final CloseableIterator<VariantContext> iterator = fileReader.iterator();
    while (iterator.hasNext()) {
        final VariantContext context = iterator.next();
        if (context.isIndel()) indelWriter.add(context);
        else if (context.isSNP()) snpWriter.add(context);
        else {
            if (STRICT) throw new IllegalStateException("Found a record with type " + context.getType().name());
            else incorrectVariantCount++;
        }

        progress.record(context.getContig(), context.getStart());
    }

    if (incorrectVariantCount > 0) {
        log.debug("Found " + incorrectVariantCount + " records that didn't match SNP or INDEL");
    }

    CloserUtil.close(iterator);
    CloserUtil.close(fileReader);
    snpWriter.close();
    indelWriter.close();

    return 0;
}
 
Example #27
Source File: MergeVcfs.java    From picard with MIT License 4 votes vote down vote up
@Override
protected int doWork() {
    final ProgressLogger progress = new ProgressLogger(log, 10000);
    final List<String> sampleList = new ArrayList<String>();
    INPUT = IOUtil.unrollFiles(INPUT, IOUtil.VCF_EXTENSIONS);
    final Collection<CloseableIterator<VariantContext>> iteratorCollection = new ArrayList<CloseableIterator<VariantContext>>(INPUT.size());
    final Collection<VCFHeader> headers = new HashSet<VCFHeader>(INPUT.size());
    VariantContextComparator variantContextComparator = null;
    SAMSequenceDictionary sequenceDictionary = null;

    if (SEQUENCE_DICTIONARY != null) {
        sequenceDictionary = SamReaderFactory.makeDefault().referenceSequence(REFERENCE_SEQUENCE).open(SEQUENCE_DICTIONARY).getFileHeader().getSequenceDictionary();
    }

    for (final File file : INPUT) {
        IOUtil.assertFileIsReadable(file);
        final VCFFileReader fileReader = new VCFFileReader(file, false);
        final VCFHeader fileHeader = fileReader.getFileHeader();
        if (fileHeader.getContigLines().isEmpty()) {
            if (sequenceDictionary == null) {
                throw new IllegalArgumentException(SEQ_DICT_REQUIRED);
            } else {
                fileHeader.setSequenceDictionary(sequenceDictionary);
            }
        }

        if (variantContextComparator == null) {
            variantContextComparator = fileHeader.getVCFRecordComparator();
        } else {
            if (!variantContextComparator.isCompatible(fileHeader.getContigLines())) {
                throw new IllegalArgumentException(
                        "The contig entries in input file " + file.getAbsolutePath() + " are not compatible with the others.");
            }
        }

        if (sequenceDictionary == null) sequenceDictionary = fileHeader.getSequenceDictionary();

        if (sampleList.isEmpty()) {
            sampleList.addAll(fileHeader.getSampleNamesInOrder());
        } else {
            if (!sampleList.equals(fileHeader.getSampleNamesInOrder())) {
                throw new IllegalArgumentException("Input file " + file.getAbsolutePath() + " has sample entries that don't match the other files.");
            }
        }
        
        // add comments in the first header
        if (headers.isEmpty()) {
            COMMENT.stream().forEach(C -> fileHeader.addMetaDataLine(new VCFHeaderLine("MergeVcfs.comment", C)));
        }

        headers.add(fileHeader);
        iteratorCollection.add(fileReader.iterator());
    }

    if (CREATE_INDEX && sequenceDictionary == null) {
        throw new PicardException(String.format("Index creation failed. %s", SEQ_DICT_REQUIRED));
    }

    final VariantContextWriterBuilder builder = new VariantContextWriterBuilder()
            .setOutputFile(OUTPUT)
            .setReferenceDictionary(sequenceDictionary);

    if (CREATE_INDEX) {
        builder.setOption(Options.INDEX_ON_THE_FLY);
    } else {
        builder.unsetOption(Options.INDEX_ON_THE_FLY);
    }
    final VariantContextWriter writer = builder.build();

    writer.writeHeader(new VCFHeader(VCFUtils.smartMergeHeaders(headers, false), sampleList));

    final MergingIterator<VariantContext> mergingIterator = new MergingIterator<VariantContext>(variantContextComparator, iteratorCollection);
    while (mergingIterator.hasNext()) {
        final VariantContext context = mergingIterator.next();
        writer.add(context);
        progress.record(context.getContig(), context.getStart());
    }

    CloserUtil.close(mergingIterator);
    writer.close();
    return 0;
}
 
Example #28
Source File: BCFRecordWriter.java    From Hadoop-BAM with MIT License 4 votes vote down vote up
protected VariantContextWriter createVariantContextWriter(Configuration conf,
		OutputStream out) {
	return new VariantContextWriterBuilder().clearOptions()
			.setOption(Options.FORCE_BCF)
			.setOutputBCFStream(out).build();
}
 
Example #29
Source File: MakeSitesOnlyVcf.java    From picard with MIT License 4 votes vote down vote up
@Override
protected int doWork() {
    IOUtil.assertFileIsReadable(INPUT);
    IOUtil.assertFileIsWritable(OUTPUT);

    final VCFFileReader reader = new VCFFileReader(INPUT, false);
    final VCFHeader inputVcfHeader = new VCFHeader(reader.getFileHeader().getMetaDataInInputOrder());
    final SAMSequenceDictionary sequenceDictionary = inputVcfHeader.getSequenceDictionary();

    if (CREATE_INDEX && sequenceDictionary == null) {
        throw new PicardException("A sequence dictionary must be available (either through the input file or by setting it explicitly) when creating indexed output.");
    }

    final ProgressLogger progress = new ProgressLogger(Log.getInstance(MakeSitesOnlyVcf.class), 10000);

    // Setup the site-only file writer
    final VariantContextWriterBuilder builder = new VariantContextWriterBuilder()
            .setOutputFile(OUTPUT)
            .setReferenceDictionary(sequenceDictionary);
    if (CREATE_INDEX)
        builder.setOption(Options.INDEX_ON_THE_FLY);
    else
        builder.unsetOption(Options.INDEX_ON_THE_FLY);
    final VariantContextWriter writer = builder.build();

    final VCFHeader header = new VCFHeader(inputVcfHeader.getMetaDataInInputOrder(), SAMPLE);
    writer.writeHeader(header);

    // Go through the input, strip the records and write them to the output
    final CloseableIterator<VariantContext> iterator = reader.iterator();
    while (iterator.hasNext()) {
        final VariantContext full = iterator.next();
        final VariantContext site = subsetToSamplesWithOriginalAnnotations(full, SAMPLE);
        writer.add(site);
        progress.record(site.getContig(), site.getStart());
    }

    CloserUtil.close(iterator);
    CloserUtil.close(reader);
    writer.close();

    return 0;
}
 
Example #30
Source File: CalculateGenotypePosteriors.java    From gatk-protected with BSD 3-Clause "New" or "Revised" License 4 votes vote down vote up
@Override
public void onTraversalStart() {
    final VariantContextWriterBuilder builder = new VariantContextWriterBuilder().setOutputFile(out).setOutputFileType(VariantContextWriterBuilder.OutputType.VCF);
    if (hasReference()){
        vcfWriter = builder.setReferenceDictionary(getBestAvailableSequenceDictionary()).setOption(Options.INDEX_ON_THE_FLY).build();
    } else {
        vcfWriter = builder.unsetOption(Options.INDEX_ON_THE_FLY).build();
        logger.info("Can't make an index for output file " + out + " because a reference dictionary is required for creating Tribble indices on the fly");
    }

    sampleDB = initializeSampleDB();

    // Get list of samples to include in the output
    final Map<String, VCFHeader> vcfHeaders = Collections.singletonMap(getDrivingVariantsFeatureInput().getName(), getHeaderForVariants());
    final Set<String> vcfSamples = VcfUtils.getSortedSampleSet(vcfHeaders, GATKVariantContextUtils.GenotypeMergeType.REQUIRE_UNIQUE);

    //Get the trios from the families passed as ped
    if (!skipFamilyPriors){
        final Set<Trio> trios = sampleDB.getTrios();
        if(trios.isEmpty()) {
            logger.info("No PED file passed or no *non-skipped* trios found in PED file. Skipping family priors.");
            skipFamilyPriors = true;
        }
    }

    final VCFHeader header = vcfHeaders.values().iterator().next();
    if ( ! header.hasGenotypingData() ) {
        throw new UserException("VCF has no genotypes");
    }

    if ( header.hasInfoLine(GATKVCFConstants.MLE_ALLELE_COUNT_KEY) ) {
        final VCFInfoHeaderLine mleLine = header.getInfoHeaderLine(GATKVCFConstants.MLE_ALLELE_COUNT_KEY);
        if ( mleLine.getCountType() != VCFHeaderLineCount.A ) {
            throw new UserException("VCF does not have a properly formatted MLEAC field: the count type should be \"A\"");
        }

        if ( mleLine.getType() != VCFHeaderLineType.Integer ) {
            throw new UserException("VCF does not have a properly formatted MLEAC field: the field type should be \"Integer\"");
        }
    }

    // Initialize VCF header
    final Set<VCFHeaderLine> headerLines = VCFUtils.smartMergeHeaders(vcfHeaders.values(), true);
    headerLines.add(GATKVCFHeaderLines.getFormatLine(GATKVCFConstants.PHRED_SCALED_POSTERIORS_KEY));
    headerLines.add(GATKVCFHeaderLines.getInfoLine(GATKVCFConstants.GENOTYPE_PRIOR_KEY));
    if (!skipFamilyPriors) {
        headerLines.add(GATKVCFHeaderLines.getFormatLine(GATKVCFConstants.JOINT_LIKELIHOOD_TAG_NAME));
        headerLines.add(GATKVCFHeaderLines.getFormatLine(GATKVCFConstants.JOINT_POSTERIOR_TAG_NAME));
    }
    headerLines.addAll(getDefaultToolVCFHeaderLines());

    vcfWriter.writeHeader(new VCFHeader(headerLines, vcfSamples));

    final Map<String,Set<Sample>> families = sampleDB.getFamilies(vcfSamples);
    famUtils = new FamilyLikelihoods(sampleDB, deNovoPrior, vcfSamples, families);
}