htsjdk.variant.vcf.VCFCodec Java Examples

The following examples show how to use htsjdk.variant.vcf.VCFCodec. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: VariantHotspotFile.java    From hmftools with GNU General Public License v3.0 6 votes vote down vote up
@NotNull
public static ListMultimap<Chromosome, VariantHotspot> readFromVCF(@NotNull final String fileName) throws IOException {
    ListMultimap<Chromosome, VariantHotspot> result = ArrayListMultimap.create();

    try (final AbstractFeatureReader<VariantContext, LineIterator> reader = AbstractFeatureReader.getFeatureReader(fileName,
            new VCFCodec(),
            false)) {
        for (VariantContext variantContext : reader.iterator()) {
            if (HumanChromosome.contains(variantContext.getContig())) {
                result.put(HumanChromosome.fromString(variantContext.getContig()), fromVariantContext(variantContext));
            }
        }
    }

    return result;
}
 
Example #2
Source File: GenomicsDBImportIntegrationTest.java    From gatk with BSD 3-Clause "New" or "Revised" License 6 votes vote down vote up
@Test
public void testPreserveContigOrderingInHeader() throws IOException {
    final String workspace = createTempDir("testPreserveContigOrderingInHeader-").getAbsolutePath() + "/workspace";
    ArrayList<SimpleInterval> intervals = new ArrayList<SimpleInterval>(Arrays.asList(new SimpleInterval("chr20", 17959479, 17959479)));
    writeToGenomicsDB(Arrays.asList(GENOMICSDB_TEST_DIR + "testHeaderContigLineSorting1.g.vcf",
            GENOMICSDB_TEST_DIR + "testHeaderContigLineSorting2.g.vcf"), intervals, workspace, 0, false, 0, 1);

    try ( final FeatureReader<VariantContext> genomicsDBFeatureReader =
                  getGenomicsDBFeatureReader(workspace, b38_reference_20_21);

         final AbstractFeatureReader<VariantContext, LineIterator> inputGVCFReader =
                  AbstractFeatureReader.getFeatureReader(GENOMICSDB_TEST_DIR + "testHeaderContigLineSorting1.g.vcf", new VCFCodec(), true);
    ) {
        final SAMSequenceDictionary dictionaryFromGenomicsDB = ((VCFHeader)genomicsDBFeatureReader.getHeader()).getSequenceDictionary();
        final SAMSequenceDictionary dictionaryFromInputGVCF =  ((VCFHeader)inputGVCFReader.getHeader()).getSequenceDictionary();

        Assert.assertEquals(dictionaryFromGenomicsDB, dictionaryFromInputGVCF, "Sequence dictionary from GenomicsDB does not match original sequence dictionary from input GVCF");
    }

}
 
Example #3
Source File: GatherVcfsCloudIntegrationTest.java    From gatk with BSD 3-Clause "New" or "Revised" License 6 votes vote down vote up
@SuppressWarnings({"unchecked"})
private void assertGatherProducesCorrectVariants(GatherVcfsCloud.GatherType gatherType, File expected, List<File> inputs) throws IOException {
    final File output = createTempFile("gathered", ".vcf.gz");
    final ArgumentsBuilder args = new ArgumentsBuilder();
    inputs.forEach(args::addInput);
    args.addOutput(output)
            .add(GatherVcfsCloud.GATHER_TYPE_LONG_NAME, gatherType.toString());
    runCommandLine(args);

    try (final AbstractFeatureReader<VariantContext, ?> actualReader = AbstractFeatureReader.getFeatureReader(output.getAbsolutePath(), null, new VCFCodec(), false, Function.identity(), Function.identity());
         final AbstractFeatureReader<VariantContext, ?> expectedReader = AbstractFeatureReader.getFeatureReader(expected.getAbsolutePath(), null, new VCFCodec(), false, Function.identity(), Function.identity())) {

        final List<VariantContext> actualVariants = StreamSupport.stream(Spliterators.spliteratorUnknownSize(actualReader.iterator(),Spliterator.ORDERED), false).collect(Collectors.toList());
        final List<VariantContext> expectedVariants = StreamSupport.stream(Spliterators.spliteratorUnknownSize(expectedReader.iterator(),Spliterator.ORDERED), false).collect(Collectors.toList());
        VariantContextTestUtils.assertEqualVariants(actualVariants, expectedVariants);

        Assert.assertEquals(((VCFHeader) actualReader.getHeader()).getMetaDataInInputOrder(),
                ((VCFHeader) expectedReader.getHeader()).getMetaDataInInputOrder());
    }
}
 
Example #4
Source File: AmberSiteFactory.java    From hmftools with GNU General Public License v3.0 6 votes vote down vote up
@NotNull
public static ListMultimap<Chromosome, AmberSite> sites(@NotNull final String vcfFile) throws IOException {
    final ListMultimap<Chromosome, AmberSite> result = ArrayListMultimap.create();

    try (final AbstractFeatureReader<VariantContext, LineIterator> reader = getFeatureReader(vcfFile, new VCFCodec(), false)) {
        for (VariantContext variant : reader.iterator()) {
            if (variant.isNotFiltered()) {
                if (HumanChromosome.contains(variant.getContig())) {
                    HumanChromosome chromosome = HumanChromosome.fromString(variant.getContig());
                    result.put(chromosome,
                            ImmutableAmberSite.builder()
                                    .chromosome(variant.getContig())
                                    .position(variant.getStart())
                                    .ref(variant.getReference().getBaseString())
                                    .alt(variant.getAlternateAllele(0).getBaseString())
                                    .snpCheck(variant.hasAttribute("SNPCHECK"))
                                    .build());
                }
            }
        }
    }

    return result;
}
 
Example #5
Source File: FeatureDataSource.java    From gatk with BSD 3-Clause "New" or "Revised" License 6 votes vote down vote up
protected static FeatureReader<VariantContext> getGenomicsDBFeatureReader(final GATKPath path, final File reference, final GenomicsDBOptions genomicsDBOptions) {
    final String workspace = IOUtils.getGenomicsDBAbsolutePath(path) ;
    if (workspace == null) {
        throw new IllegalArgumentException("Trying to create a GenomicsDBReader from  non-GenomicsDB input path " + path);
    } else if (Files.notExists(IOUtils.getPath(workspace.endsWith("/") ? workspace : workspace + "/"))) {
        throw new UserException("GenomicsDB workspace " + path + " does not exist");
    }

    final String callsetJson = IOUtils.appendPathToDir(workspace, GenomicsDBConstants.DEFAULT_CALLSETMAP_FILE_NAME);
    final String vidmapJson = IOUtils.appendPathToDir(workspace, GenomicsDBConstants.DEFAULT_VIDMAP_FILE_NAME);
    final String vcfHeader = IOUtils.appendPathToDir(workspace, GenomicsDBConstants.DEFAULT_VCFHEADER_FILE_NAME);

    IOUtils.assertPathsAreReadable(callsetJson, vidmapJson, vcfHeader);

    try {
        final GenomicsDBExportConfiguration.ExportConfiguration exportConfigurationBuilder =
                createExportConfiguration(workspace, callsetJson, vidmapJson, vcfHeader, genomicsDBOptions);
        if (genomicsDBOptions.useBCFCodec()) {
            return new GenomicsDBFeatureReader<>(exportConfigurationBuilder, new BCF2Codec(), Optional.empty());
        } else {
            return new GenomicsDBFeatureReader<>(exportConfigurationBuilder, new VCFCodec(), Optional.empty());
        }
    } catch (final IOException e) {
        throw new UserException("Couldn't create GenomicsDBFeatureReader", e);
    }
}
 
Example #6
Source File: GenotypeGVCFsIntegrationTest.java    From gatk-protected with BSD 3-Clause "New" or "Revised" License 6 votes vote down vote up
/**
 * Returns a list of VariantContext records from a VCF file
 *
 * @param vcfFile VCF file
 * @return list of VariantContext records
 * @throws IOException if the file does not exist or can not be opened
 */
private static List<VariantContext> getVariantContexts(final File vcfFile) throws IOException {
    final VCFCodec codec = new VCFCodec();
    final FileInputStream s = new FileInputStream(vcfFile);
    final LineIterator lineIteratorVCF = codec.makeSourceFromStream(new PositionalBufferedStream(s));
    codec.readHeader(lineIteratorVCF);

    final List<VariantContext> VCs = new ArrayList<>();
    while (lineIteratorVCF.hasNext()) {
        final String line = lineIteratorVCF.next();
        Assert.assertFalse(line == null);
        VCs.add(codec.decode(line));
    }

    return VCs;
}
 
Example #7
Source File: GenomicsDBImportIntegrationTest.java    From gatk with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
private static FeatureReader<VariantContext> getGenomicsDBFeatureReader(
        final String workspace, final String reference,
        final boolean produceGTField,
        final boolean sitesOnlyQuery,
        final boolean useVCFCodec) throws IOException {
   String workspaceAbsPath = BucketUtils.makeFilePathAbsolute(workspace);
   GenomicsDBExportConfiguration.ExportConfiguration.Builder exportConfigurationBuilder = GenomicsDBExportConfiguration.ExportConfiguration.newBuilder()
            .setWorkspace(workspace)
            .setReferenceGenome(reference)
            .setVidMappingFile(IOUtils.appendPathToDir(workspaceAbsPath, GenomicsDBConstants.DEFAULT_VIDMAP_FILE_NAME))
            .setCallsetMappingFile(IOUtils.appendPathToDir(workspaceAbsPath, GenomicsDBConstants.DEFAULT_CALLSETMAP_FILE_NAME))
            .setVcfHeaderFilename(IOUtils.appendPathToDir(workspaceAbsPath, GenomicsDBConstants.DEFAULT_VCFHEADER_FILE_NAME))
            .setProduceGTField(produceGTField)
            .setSitesOnlyQuery(sitesOnlyQuery)
           .setGenerateArrayNameFromPartitionBounds(true);
    GenomicsDBVidMapProto.VidMappingPB vidMapPB = null;
    try {
        vidMapPB = org.broadinstitute.hellbender.tools.genomicsdb.GenomicsDBUtils.getProtobufVidMappingFromJsonFile(IOUtils.appendPathToDir(workspace, GenomicsDBConstants.DEFAULT_VIDMAP_FILE_NAME));
    }
    catch (final IOException e) {
        throw new UserException("Could not open vid json file "+GenomicsDBConstants.DEFAULT_VIDMAP_FILE_NAME, e);
    }
    HashMap<String, Integer> fieldNameToIndexInVidFieldsList =
            org.broadinstitute.hellbender.tools.genomicsdb.GenomicsDBUtils.getFieldNameToListIndexInProtobufVidMappingObject(vidMapPB);

    vidMapPB = org.broadinstitute.hellbender.tools.genomicsdb.GenomicsDBUtils.updateINFOFieldCombineOperation(vidMapPB, fieldNameToIndexInVidFieldsList,
            GATKVCFConstants.RAW_MAPPING_QUALITY_WITH_DEPTH_KEY, "element_wise_sum");

    if(vidMapPB != null) {
        exportConfigurationBuilder.setVidMapping(vidMapPB);
    }

    if (useVCFCodec) {
        return new GenomicsDBFeatureReader<>(exportConfigurationBuilder.build(), new VCFCodec(), Optional.empty());
    } else {
        return new GenomicsDBFeatureReader<>(exportConfigurationBuilder.build(), new BCF2Codec(), Optional.empty());
    }
}
 
Example #8
Source File: VariantContextCollectionTest.java    From hmftools with GNU General Public License v3.0 5 votes vote down vote up
@NotNull
private static VCFCodec createTestCodec() {
    VCFCodec codec = new VCFCodec();
    VCFHeader header = new VCFHeader(Sets.newHashSet(), Sets.newHashSet(SAMPLE));
    codec.setVCFHeader(header, VCFHeaderVersion.VCF4_2);
    return codec;
}
 
Example #9
Source File: FeatureManagerUnitTest.java    From gatk with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
@DataProvider(name = "DetectCorrectFileFormatTestData")
public Object[][] getDetectCorrectFileFormatTestData() {
    return new Object[][] {
            { new File(FEATURE_MANAGER_TEST_DIRECTORY + "minimal_vcf4_file.vcf"), VCFCodec.class },
            { new File(FEATURE_MANAGER_TEST_DIRECTORY + "minimal_vcf3_file.vcf"), VCF3Codec.class },
            { new File(FEATURE_MANAGER_TEST_DIRECTORY + "minimal_bcf_file.bcf"), BCF2Codec.class },
            { new File(FEATURE_MANAGER_TEST_DIRECTORY + "minimal_bed_file.bed"), BEDCodec.class},
            { new File(FEATURE_MANAGER_TEST_DIRECTORY + "minimal_table_file.table"), TableCodec.class}
    };
}
 
Example #10
Source File: MakeVcfSampleNameMap.java    From picard with MIT License 5 votes vote down vote up
private static AbstractFeatureReader<VariantContext, LineIterator> getReaderFromPath(final Path variantPath) {
    final String variantURI = variantPath.toAbsolutePath().toUri().toString();
    try {
        return AbstractFeatureReader.getFeatureReader(variantURI, null, new VCFCodec(),
                false, Function.identity(), Function.identity());
    } catch (final TribbleException e) {
        throw new PicardException("Failed to create reader from " + variantURI, e);
    }
}
 
Example #11
Source File: GermlineVcfReader.java    From hmftools with GNU General Public License v3.0 5 votes vote down vote up
private void processVcf(final String vcfFile)
{
    try
    {
        LOGGER.info("processing germline VCF({})", vcfFile);

        mSampleGermlineSVs.clear();
        mSvFactory = new StructuralVariantFactory(new AlwaysPassFilter());

        final AbstractFeatureReader<VariantContext, LineIterator> reader = AbstractFeatureReader.getFeatureReader(
                vcfFile, new VCFCodec(), false);

        reader.iterator().forEach(x -> processVariant(x));

        if(mSampleGermlineSVs.isEmpty())
            return;

        if (mConfig.LinkByAssembly)
            annotateAssembledLinks(mSvAssemblyData);

        if(mConfig.CheckDisruptions)
        {
            final String sampleId = mSampleGermlineSVs.get(0).SampleId;
            mGeneImpact.findDisruptiveVariants(sampleId, mSampleGermlineSVs);
        }

        writeSVs();
    }
    catch(IOException e)
    {
        LOGGER.error("error reading vcf({}): {}", vcfFile, e.toString());
    }
}
 
Example #12
Source File: SageHotspotAnnotationTest.java    From hmftools with GNU General Public License v3.0 5 votes vote down vote up
@NotNull
private static VCFCodec createTestCodec() {
    VCFCodec codec = new VCFCodec();
    VCFHeader header = new VCFHeader(Sets.newHashSet(), Sets.newHashSet(SAMPLE));
    codec.setVCFHeader(header, VCFHeaderVersion.VCF4_2);
    return codec;
}
 
Example #13
Source File: RecoveredVariantFactoryTest.java    From hmftools with GNU General Public License v3.0 5 votes vote down vote up
@NotNull
private static VCFCodec createTestCodec() {
    VCFCodec codec = new VCFCodec();
    VCFHeader header = new VCFHeader(Sets.newHashSet(), Sets.newHashSet(SAMPLE));
    codec.setVCFHeader(header, VCFHeaderVersion.VCF4_2);
    return codec;
}
 
Example #14
Source File: StrelkaAllelicDepthTest.java    From hmftools with GNU General Public License v3.0 5 votes vote down vote up
@NotNull
private static VCFCodec createTestCodec() {
    VCFCodec codec = new VCFCodec();
    VCFHeader header = new VCFHeader(Sets.newHashSet(), Lists.newArrayList(NORMAL, TUMOR));
    codec.setVCFHeader(header, VCFHeaderVersion.VCF4_2);
    return codec;
}
 
Example #15
Source File: StructuralVariantFactoryTest.java    From hmftools with GNU General Public License v3.0 5 votes vote down vote up
@NotNull
private static VCFCodec createTestCodec() {
    VCFCodec codec = new VCFCodec();
    VCFHeader header = new VCFHeader(Sets.newHashSet(), Sets.newHashSet(SAMPLE));
    codec.setVCFHeader(header, VCFHeaderVersion.VCF4_2);
    return codec;
}
 
Example #16
Source File: VariantContextFromString.java    From hmftools with GNU General Public License v3.0 5 votes vote down vote up
@NotNull
private static VCFCodec createTestCodec(@NotNull String sample) {
    VCFCodec codec = new VCFCodec();
    VCFHeader header = new VCFHeader(Sets.newHashSet(), Sets.newHashSet(sample));
    codec.setVCFHeader(header, VCFHeaderVersion.VCF4_2);
    return codec;
}
 
Example #17
Source File: SomaticVariantFactoryTest.java    From hmftools with GNU General Public License v3.0 5 votes vote down vote up
@NotNull
private static VCFCodec createTestCodec() {
    VCFCodec codec = new VCFCodec();
    VCFHeader header = new VCFHeader(Sets.newHashSet(), Sets.newHashSet(SAMPLE));
    codec.setVCFHeader(header, VCFHeaderVersion.VCF4_2);
    return codec;
}
 
Example #18
Source File: SomaticVariantFactory.java    From hmftools with GNU General Public License v3.0 5 votes vote down vote up
public void fromVCFFile(@NotNull final String tumor, @Nullable final String reference, @Nullable final String rna,
        @NotNull final String vcfFile, Consumer<SomaticVariant> consumer) throws IOException {
    final List<VariantContext> variants = Lists.newArrayList();

    try (final AbstractFeatureReader<VariantContext, LineIterator> reader = getFeatureReader(vcfFile, new VCFCodec(), false)) {
        final VCFHeader header = (VCFHeader) reader.getHeader();
        if (!sampleInFile(tumor, header)) {
            throw new IllegalArgumentException("Sample " + tumor + " not found in vcf file " + vcfFile);
        }

        if (reference != null && !sampleInFile(reference, header)) {
            throw new IllegalArgumentException("Sample " + reference + " not found in vcf file " + vcfFile);
        }

        if (rna != null && !sampleInFile(rna, header)) {
            throw new IllegalArgumentException("Sample " + rna + " not found in vcf file " + vcfFile);
        }

        if (!header.hasFormatLine("AD")) {
            throw new IllegalArgumentException("Allelic depths is a required format field in vcf file " + vcfFile);
        }

        for (VariantContext variant : reader.iterator()) {
            if (filter.test(variant)) {
                createVariant(tumor, reference, rna, variant).ifPresent(consumer);
            }
        }
    }
}
 
Example #19
Source File: StructuralVariantFileLoader.java    From hmftools with GNU General Public License v3.0 5 votes vote down vote up
@NotNull
public static List<StructuralVariant> fromFile(@NotNull String vcfFileLocation, @NotNull VariantContextFilter filter) throws IOException {
    final StructuralVariantFactory factory = new StructuralVariantFactory(filter);

    try (final AbstractFeatureReader<VariantContext, LineIterator> reader = AbstractFeatureReader.getFeatureReader(vcfFileLocation,
            new VCFCodec(), false)) {
        reader.iterator().forEach(factory::addVariantContext);
    }

    return factory.results();
}
 
Example #20
Source File: GatherVcfsCloud.java    From gatk with BSD 3-Clause "New" or "Revised" License 4 votes vote down vote up
private static FeatureReader<VariantContext> getReaderFromVCFUri(final Path variantPath, final int cloudPrefetchBuffer) {
    final String variantURI = variantPath.toUri().toString();
    return AbstractFeatureReader.getFeatureReader(variantURI, null, new VCFCodec(), false,
            BucketUtils.getPrefetchingWrapper(cloudPrefetchBuffer),
            Function.identity());
}
 
Example #21
Source File: FeatureInputUnitTest.java    From gatk with BSD 3-Clause "New" or "Revised" License 4 votes vote down vote up
@Test
public void testFeatureCodecCache() {
    Assert.assertEquals(getVariantFeatureInputWithCachedCodec().getFeatureCodecClass(), VCFCodec.class);
}
 
Example #22
Source File: RecoveredVariantFactory.java    From hmftools with GNU General Public License v3.0 4 votes vote down vote up
RecoveredVariantFactory(@NotNull final PurityAdjuster purityAdjuster, @NotNull final String recoveryVCF) {
    reader = getFeatureReader(recoveryVCF, new VCFCodec(), true);
    ploidyFactory = new StructuralVariantLegPloidyFactory<>(purityAdjuster, PurpleCopyNumber::averageTumorCopyNumber);
}
 
Example #23
Source File: ApplyVQSRIntegrationTest.java    From gatk with BSD 3-Clause "New" or "Revised" License 4 votes vote down vote up
@Test
public void testApplyRecalibrationAlleleSpecificINDELmodeGZIPIndex() throws IOException {
    final File tempGZIPOut = createTempFile("testApplyRecalibrationAlleleSpecificINDELmodeGZIP", ".vcf.gz");
    final File expectedFile = new File(getToolTestDataDir(), "expected/applyIndelAlleleSpecificResult.vcf");
    final SimpleInterval queryInterval = new SimpleInterval("3:113005755-195507036");

    // The input file is the same file as used in testApplyRecalibrationAlleleSpecificINDELmode, except that the
    // hg38 sequence dictionary has been transplanted into the header (the header itself has been modified so that
    // contig "chr3" is renamed to "3" to match the variants in this file), and the file is a .gz.
    final String base =
            " -L " +  queryInterval.toString() +
                    " -mode INDEL -AS" +
                    " -ts-filter-level 99.3" +
                    " --variant " + getToolTestDataDir() + "VQSR.AStest.postSNPinput.HACKEDhg38header.vcf.gz" +
                    " --output " + tempGZIPOut.getAbsolutePath() +
                    " --tranches-file " + getToolTestDataDir() + "VQSR.AStest.indels.tranches" +
                    " --recal-file " + getToolTestDataDir() + "VQSR.AStest.indels.recal.vcf" +
                    " --" + StandardArgumentDefinitions.ADD_OUTPUT_VCF_COMMANDLINE +" false";

    final IntegrationTestSpec spec = new IntegrationTestSpec(base, Collections.emptyList());
    spec.executeTest("testApplyRecalibrationAlleleSpecificINDELmodeGZIP", this);

    // make sure we got a tabix index
    final File tabixIndexFile = new File(tempGZIPOut.getAbsolutePath() + FileExtensions.TABIX_INDEX);
    Assert.assertTrue(tabixIndexFile.exists());
    Assert.assertTrue(tabixIndexFile.length() > 0);

    // Now verify that the resulting index is valid by querying the same interval used above to run ApplyVQSR.
    // This should return all the variants in the file. Compare that with the number of records returned by (a
    // non-query) iterator that return all variants in the expected results file.
    try (final FeatureReader<VariantContext> expectedFileReader =
                 AbstractFeatureReader.getFeatureReader(expectedFile.getAbsolutePath(), new VCFCodec(), false);
            final FeatureReader<VariantContext> outputFileReader =
                 AbstractFeatureReader.getFeatureReader(tempGZIPOut.getAbsolutePath(), new VCFCodec())) {
        // results from the query should match the expected file results
        final long actualCount = outputFileReader.query(
                queryInterval.getContig(), queryInterval.getStart(), queryInterval.getEnd()).stream().count();
        final long expectedCount = expectedFileReader.iterator().stream().count();
        Assert.assertEquals(actualCount, expectedCount);
    }
}