Java Code Examples for htsjdk.variant.vcf.VCFFileReader

The following examples show how to use htsjdk.variant.vcf.VCFFileReader. These examples are extracted from open source projects. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
private AnnotateStrelkaWithAllelicDepth(final Options options, final String... args) throws ParseException {
    final CommandLine cmd = createCommandLine(args, options);
    if (!cmd.hasOption(VCF_IN)) {
        throw new ParseException(VCF_IN + " is a mandatory argument");
    }

    if (!cmd.hasOption(VCF_OUT)) {
        throw new ParseException(VCF_OUT + " is a mandatory argument");
    }

    inputVCF = cmd.getOptionValue(VCF_IN);
    outputVCF = cmd.getOptionValue(VCF_OUT);

    vcfReader = new VCFFileReader(new File(inputVCF), false);
    header = generateOutputHeader(vcfReader.getFileHeader());
    vcfWriter = new VariantContextWriterBuilder().setOutputFile(outputVCF)
            .setReferenceDictionary(header.getSequenceDictionary())
            .setIndexCreator(new TabixIndexCreator(header.getSequenceDictionary(), new TabixFormat()))
            .setOption(htsjdk.variant.variantcontext.writer.Options.ALLOW_MISSING_FIELDS_IN_HEADER)
            .build();
}
 
Example 2
private static void vcfEquivalenceTest(final String generatedVCFPath, final String expectedVCFPath,
                                       final List<String> attributesToIgnore, final boolean onHDFS) throws Exception {

    List<VariantContext> expectedVcs;
    try (final VCFFileReader fileReader = new VCFFileReader(new File(expectedVCFPath), false) ) {
        try (final CloseableIterator<VariantContext> iterator = fileReader.iterator()) {
            expectedVcs = Utils.stream(iterator).collect(Collectors.toList());
        }
    }

    final List<VariantContext> actualVcs = StructuralVariationDiscoveryPipelineSparkIntegrationTest
            .extractActualVCs(generatedVCFPath, onHDFS);

    GATKBaseTest.assertCondition(actualVcs, expectedVcs,
            (a, e) -> VariantContextTestUtils.assertVariantContextsAreEqual(a, e, attributesToIgnore, Collections.emptyList()));
}
 
Example 3
Source Project: picard   Source File: TestFilterVcf.java    License: MIT License 6 votes vote down vote up
@Test(dataProvider = "goodInputVcfs")
public void testJavaScript(final File input) throws Exception {
    final File out = VcfTestUtils.createTemporaryIndexedFile("filterVcfTestJS.", ".vcf");
    final FilterVcf filterer = new FilterVcf();
    filterer.INPUT = input;
    filterer.OUTPUT = out;
    filterer.JAVASCRIPT_FILE = quickJavascriptFilter("variant.getStart()%5 != 0");

    final int retval = filterer.doWork();
    Assert.assertEquals(retval, 0);

    //count the number of reads
    final int expectedNumber = 4;
    int count = 0;
    VCFFileReader in = new VCFFileReader(filterer.OUTPUT, false);
    CloseableIterator<VariantContext> iter = in.iterator();
    while (iter.hasNext()) {
        final VariantContext ctx = iter.next();
        count += (ctx.isFiltered() ? 1 : 0);
    }
    iter.close();
    in.close();
    Assert.assertEquals(count, expectedNumber);
}
 
Example 4
@Test
public void testQueryAllHG38Intervals() {
    SAMSequenceDictionary sd;
    final File testFile = new File (FEATURE_DATA_SOURCE_TEST_DIRECTORY, "Homo_sapiens_assembly38.headerOnly.vcf.gz");

    try (VCFFileReader vcfReader = new VCFFileReader(testFile, false)) {
        sd = vcfReader.getFileHeader().getSequenceDictionary();
    }

    // Test that we can execute a query using any hg38 contig name against a VCF with an hg38 sequence dictionary.
    // Since the query is provided as a SimpleInterval, no interval query parsing or disambiguation is executed by
    // this code path, but GenomeLocParserUnitTest IntervalUtilsUnitTest have corresponding tests that ensures that
    // no hg38 query can be ambiguous.
    try (final FeatureDataSource<VariantContext> featureSource = new FeatureDataSource<>(testFile)) {
        sd.getSequences().stream().forEach(
                hg38Contig -> featureSource.query(new SimpleInterval(hg38Contig.getSequenceName(), 1, hg38Contig.getSequenceLength())));
    }
}
 
Example 5
private Triple<VariantContext, ReferenceContext, List<Feature>> createDummyCacheTriples(final List<String> alleles, final int offset) {

        // Create an interval for a variant that overlaps an entry in the exac snippet test file when offset = 0.
        final SimpleInterval variantInterval = new SimpleInterval("3", 13372+offset, 13372+offset);
        final VariantContext vc =  new VariantContextBuilder()
                .chr(variantInterval.getContig()).start(variantInterval.getStart()).stop(variantInterval.getEnd())
                .alleles(alleles)
                .make();
        final ReferenceContext referenceContext = new ReferenceContext(ReferenceDataSource.of(Paths.get(FuncotatorReferenceTestUtils.retrieveB37Chr3Ref())), variantInterval);
        final List<Feature> vcfFeatures;
        try (final VCFFileReader vcfReader = new VCFFileReader(IOUtils.getPath(EXAC_SNIPPET))) {
            vcfFeatures = vcfReader.query(variantInterval.getContig(), variantInterval.getStart(), variantInterval.getEnd()).stream().collect(Collectors.toList());
        }

        return Triple.of(vc, referenceContext, vcfFeatures);
    }
 
Example 6
Source Project: picard   Source File: AbstractVcfMergingClpTester.java    License: MIT License 6 votes vote down vote up
/**
 * Make sure that the order of the output file is identical to the order
 * of the input files by iterating through the output, making sure that,
 * if the context is an indel (snp), the next genomic position in the indel
 * (snp) queue is the same. Also make sure that the context is in the order
 * specified by the input files.
 */
private void validateSnpAndIndelResults(final File output, final Queue<String> indelContigPositions, final Queue<String> snpContigPositions) {
    final VCFFileReader outputReader = new VCFFileReader(output, false);
    final VariantContextComparator outputComparator = outputReader.getFileHeader().getVCFRecordComparator();
    VariantContext last = null;
    final CloseableIterator<VariantContext> iterator = outputReader.iterator();
    while (iterator.hasNext()) {
        final VariantContext outputContext = iterator.next();
        if (outputContext.isIndel()) Assert.assertEquals(getContigPosition(outputContext), indelContigPositions.poll());
        if (outputContext.isSNP()) Assert.assertEquals(getContigPosition(outputContext), snpContigPositions.poll());
        if (last != null) Assert.assertTrue(outputComparator.compare(last, outputContext) <= 0);
        last = outputContext;
    }
    iterator.close();

    // We should have polled everything off the indel (snp) queues
    Assert.assertEquals(indelContigPositions.size(), 0);
    Assert.assertEquals(snpContigPositions.size(), 0);
}
 
Example 7
private static void processVariants(boolean strelka, @NotNull final String filePath, @NotNull final String outputVcf,
        @NotNull final String tumorBam) {
    final VCFFileReader vcfReader = new VCFFileReader(new File(filePath), false);
    final VCFHeader outputHeader = generateOutputHeader(vcfReader.getFileHeader(), "TUMOR");
    final VariantContextWriter vcfWriter = new VariantContextWriterBuilder().setOutputFile(outputVcf)
            .setReferenceDictionary(vcfReader.getFileHeader().getSequenceDictionary())
            .build();
    vcfWriter.writeHeader(outputHeader);
    final MNVValidator validator = ImmutableMNVValidator.of(tumorBam);
    final MNVMerger merger = ImmutableMNVMerger.of(outputHeader);
    Pair<PotentialMNVRegion, Optional<PotentialMNVRegion>> outputPair = ImmutablePair.of(PotentialMNVRegion.empty(), Optional.empty());
    for (final VariantContext rawVariant : vcfReader) {
        final VariantContext simplifiedVariant =
                strelka ? StrelkaPostProcess.simplifyVariant(rawVariant, StrelkaPostProcess.TUMOR_GENOTYPE) : rawVariant;

        final PotentialMNVRegion potentialMNV = outputPair.getLeft();
        outputPair = MNVDetector.addMnvToRegion(potentialMNV, simplifiedVariant);
        outputPair.getRight().ifPresent(mnvRegion -> validator.mergeVariants(mnvRegion, merger).forEach(vcfWriter::add));
    }
    validator.mergeVariants(outputPair.getLeft(), merger).forEach(vcfWriter::add);
    vcfWriter.close();
    vcfReader.close();
    LOGGER.info("Written output variants to " + outputVcf);
}
 
Example 8
Source Project: picard   Source File: GatherVcfsTest.java    License: MIT License 6 votes vote down vote up
@Test(dataProvider = "vcfshards")
public void TestGatherFiles(final List<File> inputFiles, final File expectedOutput, final int expectedRetVal, boolean reorder) throws IOException {
    final String comment1 = "This is a comment";
    final List<String> args = new ArrayList<>();

    final File output = VcfTestUtils.createTemporaryIndexedFile("result", expectedOutput.getAbsolutePath().endsWith(".vcf") ? ".vcf" : ".vcf.gz");

    inputFiles.forEach(f -> args.add("INPUT=" + f.getAbsolutePath()));
    args.add("OUTPUT=" + output.getAbsolutePath());
    args.add("COMMENT=" + comment1);
    args.add("REORDER_INPUT_BY_FIRST_VARIANT=" +  reorder);

    Assert.assertEquals(runPicardCommandLine(args.toArray(new String[args.size()])), expectedRetVal, "Program was expected to run successfully, but didn't.");

    if (expectedRetVal == 0) {
        final VCFFileReader expectedReader = new VCFFileReader(expectedOutput, false);
        final VCFFileReader outputReader = new VCFFileReader(output, false);
        Assert.assertTrue(outputReader.getFileHeader().getMetaDataInInputOrder().stream().anyMatch(H->H.getKey().equals("GatherVcfs.comment") && H.getValue().equals(comment1)));
        Assert.assertEquals(expectedReader.iterator().stream().count(), outputReader.iterator().stream().count(), "The wrong number of variants was found.");
        outputReader.close();
        expectedReader.close();
    }
}
 
Example 9
@Test
public void testFeaturesHeader() throws Exception {
    final TestGATKToolWithFeatures tool = new TestGATKToolWithFeatures();
    final CommandLineParser clp = new CommandLineArgumentParser(tool);
    final File vcfFile = new File(publicTestDir + "org/broadinstitute/hellbender/engine/feature_data_source_test_with_bigHeader.vcf");
    final String[] args = {"--mask", vcfFile.getCanonicalPath()};
    clp.parseArguments(System.out, args);
    tool.onStartup();
    final Object headerForFeatures = tool.getHeaderForFeatures(tool.mask);
    Assert.assertTrue(headerForFeatures instanceof VCFHeader);
    final VCFHeader  vcfheaderForFeatures = (VCFHeader) headerForFeatures;

    try(final VCFFileReader vcfReader = new VCFFileReader(vcfFile, false)){  //read the file directly and compare headers
        final VCFHeader vcfFileHeader= vcfReader.getFileHeader();
        Assert.assertEquals(vcfheaderForFeatures.getGenotypeSamples(), vcfFileHeader.getGenotypeSamples());
        Assert.assertEquals(vcfheaderForFeatures.getInfoHeaderLines(), vcfFileHeader.getInfoHeaderLines());
        Assert.assertEquals(vcfheaderForFeatures.getFormatHeaderLines(), vcfFileHeader.getFormatHeaderLines());
        Assert.assertEquals(vcfheaderForFeatures.getFilterLines(), vcfFileHeader.getFilterLines());
        Assert.assertEquals(vcfheaderForFeatures.getContigLines(), vcfFileHeader.getContigLines());
        Assert.assertEquals(vcfheaderForFeatures.getOtherHeaderLines(), vcfFileHeader.getOtherHeaderLines());
    }
    tool.doWork();
    tool.onShutdown();
}
 
Example 10
Source Project: genomewarp   Source File: VcfToVariantTest.java    License: Apache License 2.0 6 votes vote down vote up
@Test
/**
 * Note: HTSJDK cannot distinguish between PASS filters and
 * unfiltered for variant calls (it can for general filters).
 * As a result, all variant calls which PASS do not have filter
 * information applied.
 */
public void testGetCalls() throws Exception {
  File vcfFile =
      new File(VcfToVariant.class.getClassLoader().getResource(VALID_VCF_4_1).getFile());
  VCFFileReader vcfReader = new VCFFileReader(vcfFile, false);
  VCFHeader vcfHeader = vcfReader.getFileHeader();
  int currVariant = 0;

  for (final VariantContext vc : vcfReader) {
    List<VariantCall> callList = VcfToVariant.getCalls(vc, vcfHeader);
    assertEquals(callList, TRUTH.get(currVariant).getCallsList());
    currVariant++;
  }
}
 
Example 11
Source Project: genomewarp   Source File: GenomeWarpSerialTest.java    License: Apache License 2.0 6 votes vote down vote up
@Test
public void testGenerateBEDFromVCF() {
  String file = GenomeWarpSerialTest.class.getClassLoader().getResource(VCF_PATH).getFile();
  VCFFileReader testVcf = new VCFFileReader(new File(file), false);

  SortedMap<String, List<GenomeRange>> want = new TreeMap<String, List<GenomeRange>>() {{
    put("chr1", new ArrayList<GenomeRange>() {{
      add(new GenomeRange("chr1", 49, 50));
      add(new GenomeRange("chr1", 51, 52));
      add(new GenomeRange("chr1", 119, 122));
      add(new GenomeRange("chr1", 136, 137));
      add(new GenomeRange("chr1", 189, 190));
    }});
    put("chr2", new ArrayList<GenomeRange>() {{
      add(new GenomeRange("chr2", 139, 141));
    }});
  }};

  SortedMap<String, List<GenomeRange>> got = GenomeRangeUtils.generateBEDFromVCF(testVcf, null);
  assertEquals(got.size(), want.size());
  for(String key: got.keySet()) {
    assertTrue(GenomeWarpTestUtils.equivalentRanges(got.get(key), want.get(key)));
  }
}
 
Example 12
private List<HiddenStateSegmentRecord<CopyNumberTriState, Target>> composeExpectedSegments(final File vcf, final TargetCollection<Target> targets) throws IOException {
    final VCFFileReader reader = new VCFFileReader(vcf, false);
    final List<HiddenStateSegmentRecord<CopyNumberTriState, Target>> result = new ArrayList<>();
    reader.iterator().forEachRemaining(vc -> {
        final int targetCount = targets.indexRange(vc).size();
        for (final Genotype genotype : vc.getGenotypes()) {
            final int cn = Integer.parseInt(genotype.getExtendedAttribute("CN").toString());
            final double[] cnp = Stream.of(genotype.getExtendedAttribute("CNP").toString().replaceAll("\\[\\]", "").split(","))
                    .mapToDouble(Double::parseDouble).toArray();

            final double cnpSum = MathUtils.approximateLog10SumLog10(cnp);
            final CopyNumberTriState call = expectedCall(cn);
            final double exactLog10Prob = expectedExactLog10(call, cnp);
            final HiddenStateSegment<CopyNumberTriState, Target> expectedSegment = new HiddenStateSegment<>(
                    new SimpleInterval(vc), targetCount, Double.parseDouble(genotype.getExtendedAttribute("CNF").toString()),
                    0.000, call, -10.0 * exactLog10Prob, Double.NaN,  Double.NaN, Double.NaN,
                      -10.0 * (cnp[ConvertGSVariantsToSegments.NEUTRAL_COPY_NUMBER_DEFAULT] - cnpSum)
            );
            result.add(new HiddenStateSegmentRecord<>(genotype.getSampleName(), expectedSegment));
        }
    });
    return result;
}
 
Example 13
Source Project: picard   Source File: CollectSamErrorMetrics.java    License: MIT License 6 votes vote down vote up
/**
 * If there's a variant at the locus return true, otherwise false.
 * <p>
 * HAS SIDE EFFECTS!!! Queries the vcfFileReader
 *
 * @param vcfFileReader      a {@link VCFFileReader} to query for the given locus
 * @param locusInfo          a {@link SamLocusIterator.LocusInfo} at which to examine the variants
 * @return true if there's a variant over the locus, false otherwise.
 */
private static boolean checkLocus(final VCFFileReader vcfFileReader, final SamLocusIterator.LocusInfo locusInfo) {
    boolean overlaps = false;

    if (locusInfo != null) {
        try (final CloseableIterator<VariantContext> vcfIterator = vcfFileReader.query(locusInfo)) {

            while (vcfIterator.hasNext()) {
                if (vcfIterator.next().isFiltered()) {
                    continue;
                }
                overlaps = true;
                break;
            }
        }
    }
    return overlaps;
}
 
Example 14
Source Project: picard   Source File: SortVcf.java    License: MIT License 6 votes vote down vote up
/**
 * Merge the inputs and sort them by adding each input's content to a single SortingCollection.
 * <p/>
 * NB: It would be better to have a merging iterator as in MergeSamFiles, as this would perform better for pre-sorted inputs.
 * Here, we are assuming inputs are unsorted, and so adding their VariantContexts iteratively is fine for now.
 * MergeVcfs exists for simple merging of presorted inputs.
 *
 * @param readers      - a list of VCFFileReaders, one for each input VCF
 * @param outputHeader - The merged header whose information we intend to use in the final output file
 */
private SortingCollection<VariantContext> sortInputs(final List<VCFFileReader> readers, final VCFHeader outputHeader) {
    final ProgressLogger readProgress = new ProgressLogger(log, 25000, "read", "records");

    // NB: The default MAX_RECORDS_IN_RAM may not be appropriate here. VariantContexts are smaller than SamRecords
    // We would have to play around empirically to find an appropriate value. We are not performing this optimization at this time.
    final SortingCollection<VariantContext> sorter =
            SortingCollection.newInstance(
                    VariantContext.class,
                    new VCFRecordCodec(outputHeader, VALIDATION_STRINGENCY != ValidationStringency.STRICT),
                    outputHeader.getVCFRecordComparator(),
                    MAX_RECORDS_IN_RAM,
                    TMP_DIR);
    int readerCount = 1;
    for (final VCFFileReader reader : readers) {
        log.info("Reading entries from input file " + readerCount);
        for (final VariantContext variantContext : reader) {
            sorter.add(variantContext);
            readProgress.record(variantContext.getContig(), variantContext.getStart());
        }
        reader.close();
        readerCount++;
    }
    return sorter;
}
 
Example 15
Source Project: picard   Source File: FingerprintChecker.java    License: MIT License 6 votes vote down vote up
/**
 * Loads genotypes from the supplied file into one or more Fingerprint objects and returns them in a
 * Map of Sample->Fingerprint.
 *
 * @param fingerprintFile - VCF file containing genotypes for one or more samples
 * @param specificSample  - null to load genotypes for all samples contained in the file or the name
 *                        of an individual sample to load (and exclude all others).
 * @return a Map of Sample name to Fingerprint
 */
public Map<String, Fingerprint> loadFingerprints(final Path fingerprintFile, final String specificSample) {
    final VCFFileReader reader = new VCFFileReader(fingerprintFile, false);
    checkDictionaryGoodForFingerprinting(reader.getFileHeader().getSequenceDictionary());

    final Map<String, Fingerprint> fingerprints;
    if (reader.isQueryable()) {
        fingerprints = loadFingerprintsFromQueriableReader(reader, specificSample, fingerprintFile);
    } else {
        log.warn("Couldn't find index for file " + fingerprintFile + " going to read through it all.");
        fingerprints = loadFingerprintsFromVariantContexts(reader, specificSample, fingerprintFile);
    }
    //add an entry for each sample which was not fingerprinted
    reader.getFileHeader().getGenotypeSamples().forEach(sample -> fingerprints.computeIfAbsent(sample, s -> new Fingerprint(s, fingerprintFile, null)));

    return fingerprints;
}
 
Example 16
Source Project: picard   Source File: FingerprintChecker.java    License: MIT License 6 votes vote down vote up
/**
 * Loads genotypes from the supplied reader into one or more Fingerprint objects and returns them in a
 * Map of Sample->Fingerprint.
 *
 * @param reader         - VCF reader containing genotypes for one or more samples
 * @param specificSample - null to load genotypes for all samples contained in the file or the name
 *                       of an individual sample to load (and exclude all others).
 * @param source         The path of the source file used. used to emit errors.
 * @return a Map of Sample name to Fingerprint
 */
public Map<String, Fingerprint> loadFingerprintsFromQueriableReader(final VCFFileReader reader, final String specificSample, final Path source) {

    checkDictionaryGoodForFingerprinting(reader.getFileHeader().getSequenceDictionary());

    final SortedSet<Snp> snps = new TreeSet<>(haplotypes.getAllSnps());

    return loadFingerprintsFromVariantContexts(() ->
                    snps.stream().map(snp -> {
                        try {
                            return reader.query(snp.getChrom(), snp.getPos(), snp.getPos()).next();
                        } catch (NoSuchElementException e) {
                            return null;
                        }
                    }).iterator(),
            specificSample, source);
}
 
Example 17
Source Project: picard   Source File: InfiniumVcfFieldsTest.java    License: MIT License 6 votes vote down vote up
@Test
public void testGetValueFromVcfOtherHeaderLine() throws ParseException {
    try (final VCFFileReader in = new VCFFileReader(TEST_VCF_FILE, false)) {
        final VCFHeader header = in.getFileHeader();
        Assert.assertEquals(InfiniumVcfFields.getOptionalValueFromVcfOtherHeaderLine(header, InfiniumVcfFields.PIPELINE_VERSION), "IlluminaGenotypingArray_v1.5");
        Assert.assertEquals(InfiniumVcfFields.getIntegerFromVcfOtherHeaderLine(header, InfiniumVcfFields.ANALYSIS_VERSION_NUMBER).intValue(), 1);
        Assert.assertEquals(InfiniumVcfFields.getValueFromVcfOtherHeaderLine(header, InfiniumVcfFields.EXTENDED_ILLUMINA_MANIFEST_VERSION), "1.3");
        Assert.assertEquals(InfiniumVcfFields.getOptionalDoubleFromVcfOtherHeaderLine(header, InfiniumVcfFields.GTC_CALL_RATE), 0.985);
        Assert.assertEquals(InfiniumVcfFields.getOptionalValueFromVcfOtherHeaderLine(header, InfiniumVcfFields.AUTOCALL_GENDER), "M");
        Assert.assertNull(InfiniumVcfFields.getOptionalValueFromVcfOtherHeaderLine(header, "nothing"));
        Assert.assertNull(InfiniumVcfFields.getOptionalIntegerFromVcfOtherHeaderLine(header, "nothing"));
        Integer analysisVersionNumber = InfiniumVcfFields.getOptionalIntegerFromVcfOtherHeaderLine(header, InfiniumVcfFields.ANALYSIS_VERSION_NUMBER);
        Assert.assertNotNull(analysisVersionNumber);
        Assert.assertEquals(analysisVersionNumber.intValue(), 1);

        final Date truthAutocallDate = new Iso8601Date(autocallDateFormat.parse("04/18/2019 20:57"));
        final Iso8601Date autocallDate = InfiniumVcfFields.getDateFromVcfOtherHeaderLine(header, InfiniumVcfFields.AUTOCALL_DATE, autocallDateFormat);
        Assert.assertEquals(autocallDate, truthAutocallDate);
    }
}
 
Example 18
Source Project: Drop-seq   Source File: VCFUtils.java    License: MIT License 5 votes vote down vote up
/**
 * Does the VCF file use genotype qualities?
 * @param vcfReader
 * @return
 */
public static boolean GQInHeader (final VCFFileReader vcfReader) {
	Collection<VCFFormatHeaderLine> r = vcfReader.getFileHeader().getFormatHeaderLines();
	for (VCFFormatHeaderLine l : r) {
		String id = l.getID();
		if (id.equals("GQ")) return true;
	}
	return false;
}
 
Example 19
Source Project: Drop-seq   Source File: SequenceDictionaryIntersection.java    License: MIT License 5 votes vote down vote up
private static SAMSequenceDictionary getSequenceDictionary(final Object o, final ObjectType objectType) {
    switch(objectType) {
        case DICT:
            return (SAMSequenceDictionary)o;
        case BAM_HEADER:
            return ((SAMFileHeader) o).getSequenceDictionary();
        case BAM:
            return ((SamReader)o).getFileHeader().getSequenceDictionary();
        case VCF:
            return ((VCFFileReader)o).getFileHeader().getSequenceDictionary();
        case INTERVAL_LIST:
            return ((IntervalList)o).getHeader().getSequenceDictionary();
    }
    throw new IllegalStateException("unpossible");
}
 
Example 20
Source Project: picard   Source File: TestFilterVcf.java    License: MIT License 5 votes vote down vote up
/**
 * Tests that all records get PASS set as their filter when extreme values are used for filtering.
 */
@Test(dataProvider = "goodInputVcfs")
public void testNoFiltering(final File input) throws Exception {
    final File out = testFiltering(input, ".vcf.gz", 0, 0, 0, Double.MAX_VALUE);
    final VCFFileReader in = new VCFFileReader(out, false);
    for (final VariantContext ctx : in) {
        if (!ctx.filtersWereApplied() || ctx.isFiltered()) {
            Assert.fail("Context should not have been filtered: " + ctx.toString());
        }
    }
    in.close();
}
 
Example 21
PurpleStructuralVariantSupplier(@NotNull final String version, @NotNull final String templateVCF, @NotNull final String outputVCF,
        @NotNull final String refGenomePath) {
    final VCFFileReader vcfReader = new VCFFileReader(new File(templateVCF), false);
    this.outputVCF = outputVCF;
    this.refGenomePath = refGenomePath;
    this.header = Optional.of(generateOutputHeader(version, vcfReader.getFileHeader()));
    this.variants = new VariantContextCollectionImpl(header.get());

    for (VariantContext context : vcfReader) {
        variants.add(context);
    }

    vcfReader.close();
}
 
Example 22
private static void processVariants(@NotNull final String filePath, @NotNull final String outputVcf, @NotNull final String outputBed,
        boolean strelka) throws IOException {
    final VCFFileReader vcfReader = new VCFFileReader(new File(filePath), false);
    final VCFHeader outputHeader =
            strelka ? generateOutputHeader(vcfReader.getFileHeader(), StrelkaPostProcess.TUMOR_GENOTYPE) : vcfReader.getFileHeader();
    final BufferedWriter bedWriter = new BufferedWriter(new FileWriter(outputBed, false));
    final VariantContextWriter vcfWriter = new VariantContextWriterBuilder().setOutputFile(outputVcf)
            .setReferenceDictionary(outputHeader.getSequenceDictionary())
            .build();
    vcfWriter.writeHeader(outputHeader);

    Pair<PotentialMNVRegion, Optional<PotentialMNVRegion>> outputPair = ImmutablePair.of(PotentialMNVRegion.empty(), Optional.empty());
    for (final VariantContext rawVariant : vcfReader) {
        final VariantContext variant =
                strelka ? StrelkaPostProcess.simplifyVariant(rawVariant, StrelkaPostProcess.TUMOR_GENOTYPE) : rawVariant;

        final PotentialMNVRegion potentialMNVregion = outputPair.getLeft();
        outputPair = MNVDetector.addMnvToRegion(potentialMNVregion, variant);
        outputPair.getRight()
                .ifPresent(mnvRegion -> filterMnvRegion(mnvRegion).ifPresent(filteredRegion -> writeMnvRegionToFiles(filteredRegion,
                        vcfWriter,
                        bedWriter,
                        "\n")));
    }
    filterMnvRegion(outputPair.getLeft()).ifPresent(mnvRegion -> writeMnvRegionToFiles(mnvRegion, vcfWriter, bedWriter, ""));
    vcfWriter.close();
    vcfReader.close();
    bedWriter.close();
    LOGGER.info("Written output variants to {}. Written bed regions to {}.", outputVcf, outputBed);
}
 
Example 23
private static void processVariants(@NotNull final String filePath, @NotNull final Slicer highConfidenceSlicer,
        @NotNull final String outputVcf, @NotNull final String sampleName, @NotNull final String tumorBam) {
    final VCFFileReader vcfReader = new VCFFileReader(new File(filePath), false);
    final VCFHeader outputHeader = generateOutputHeader(vcfReader.getFileHeader(), sampleName);
    final VariantContextWriter writer = new VariantContextWriterBuilder().setOutputFile(outputVcf)
            .setReferenceDictionary(outputHeader.getSequenceDictionary())
            .build();
    writer.writeHeader(outputHeader);
    final MNVValidator validator = ImmutableMNVValidator.of(tumorBam);
    final MNVMerger merger = ImmutableMNVMerger.of(outputHeader);

    Pair<PotentialMNVRegion, Optional<PotentialMNVRegion>> outputPair = ImmutablePair.of(PotentialMNVRegion.empty(), Optional.empty());

    final VariantContextFilter filter = new StrelkaPostProcess(highConfidenceSlicer);
    for (final VariantContext variantContext : vcfReader) {
        if (filter.test(variantContext)) {
            final VariantContext simplifiedVariant = StrelkaPostProcess.simplifyVariant(variantContext, sampleName);
            final PotentialMNVRegion potentialMNV = outputPair.getLeft();
            outputPair = MNVDetector.addMnvToRegion(potentialMNV, simplifiedVariant);
            outputPair.getRight().ifPresent(mnvRegion -> validator.mergeVariants(mnvRegion, merger).forEach(writer::add));
        }
    }
    validator.mergeVariants(outputPair.getLeft(), merger).forEach(writer::add);
    writer.close();
    vcfReader.close();
    LOGGER.info("Written output variants to " + outputVcf);
}
 
Example 24
@Test(dataProvider = "provideForTestCreateFuncotationsOnVariant")
public void testCreateFuncotationsOnVariant(final String variantFeatureDataFileName,
                                            final VariantContext variant,
                                            final ReferenceContext referenceContext,
                                            final List<Funcotation> expected) {
    // Make our factory:
    final VcfFuncotationFactory vcfFuncotationFactory =
            createVcfFuncotationFactory(FACTORY_NAME, FACTORY_VERSION, IOUtils.getPath(variantFeatureDataFileName));

    // Create features from the file:
    final List<Feature> vcfFeatures;
    try (final VCFFileReader vcfReader = new VCFFileReader(IOUtils.getPath(variantFeatureDataFileName))) {
        vcfFeatures = vcfReader.query(variant.getContig(), variant.getStart(), variant.getEnd()).stream().collect(Collectors.toList());
    }

    // Check to see if we're in business:
    Assert.assertEquals(
            vcfFuncotationFactory.createFuncotationsOnVariant(
                    variant,
                    referenceContext,
                    vcfFeatures
            ),
            expected
    );

    Assert.assertEquals(
            vcfFuncotationFactory.createFuncotationsOnVariant(
                    variant,
                    referenceContext,
                    vcfFeatures,
                    Collections.emptyList()
            ),
            expected
    );
}
 
Example 25
Source Project: picard   Source File: VcfTestUtils.java    License: MIT License 5 votes vote down vote up
public static void assertVcfFilesAreEqual(final File actual, final File expected) throws IOException {

        final File indexedActual = createTemporaryIndexedVcfFromInput(actual, "assert");
        final File indexedExpected = createTemporaryIndexedVcfFromInput(expected, "assert");

        try (final VCFFileReader vcfReaderActual = new VCFFileReader(indexedActual);
             final VCFFileReader vcfReaderExpected = new VCFFileReader(indexedExpected)) {
            assertEquals(vcfReaderActual, vcfReaderExpected);
        }
    }
 
Example 26
public void testCountSamplesInCreatedChunk() throws IOException {

		String configFolder = "test-data/configs/hapmap-chr1";
		String inputFolder = "test-data/data/simulated-chip-1chr-imputation";

		// create workflow context
		WorkflowTestContext context = buildContext(inputFolder, "hapmap2");

		// get output directory
		String out = context.getOutput("chunksDir");

		// create step instance
		FastQualityControlMock qcStats = new FastQualityControlMock(configFolder);

		// run and test
		assertTrue(run(context, qcStats));
		for (File file : new File(out).listFiles()) {
			if (file.getName().endsWith("chunk_1_80000001_100000000.vcf.gz")) {
				VCFFileReader vcfReader = new VCFFileReader(file, false);
				CloseableIterator<VariantContext> it = vcfReader.iterator();
				if (it.hasNext()) {
					VariantContext a = it.next();
					assertEquals(255, a.getNSamples());
				}
				vcfReader.close();
			}

		}

	}
 
Example 27
public void testTabixIndexCreationChr20() throws IOException {

		String configFolder = "test-data/configs/hapmap-chr1";
		// input folder contains no vcf or vcf.gz files
		String inputFolder = "test-data/data/chr20-phased";

		// create workflow context
		WorkflowTestContext context = buildContext(inputFolder, "hapmap2");

		// create step instance
		InputValidation inputValidation = new InputValidationMock(configFolder);

		// run and test
		boolean result = run(context, inputValidation);

		// check if step is failed
		assertEquals(true, result);
		assertTrue(context.hasInMemory("[OK] 1 valid VCF file(s) found."));

		
		// test tabix index and count snps
		String vcfFilename = inputFolder + "/chr20.R50.merged.1.330k.recode.small.vcf.gz";
		VCFFileReader vcfReader = new VCFFileReader(new File(vcfFilename),
				new File(vcfFilename + TabixUtils.STANDARD_INDEX_EXTENSION), true);
		CloseableIterator<VariantContext> snps = vcfReader.query("20", 1, 1000000000);
		int count = 0;
		while (snps.hasNext()) {
			snps.next();
			count++;
		}
		snps.close();
		vcfReader.close();
		
		//check snps
		assertEquals(7824, count);

	}
 
Example 28
public void testTabixIndexCreationChr1() throws IOException {

		String configFolder = "test-data/configs/hapmap-chr1";
		// input folder contains no vcf or vcf.gz files
		String inputFolder = "test-data/data/single";

		// create workflow context
		WorkflowTestContext context = buildContext(inputFolder, "hapmap2");
		context.setInput("phasing", "eagle");

		// create step instance
		InputValidation inputValidation = new InputValidationMock(configFolder);

		// run and test
		boolean result = run(context, inputValidation);

		// check if step is failed
		assertEquals(true, result);
		assertTrue(context.hasInMemory("[OK] 1 valid VCF file(s) found."));

		
		// test tabix index and count snps
		String vcfFilename = inputFolder + "/minimac_test.50.vcf.gz";
		VCFFileReader vcfReader = new VCFFileReader(new File(vcfFilename),
				new File(vcfFilename + TabixUtils.STANDARD_INDEX_EXTENSION), true);
		CloseableIterator<VariantContext> snps = vcfReader.query("1", 1, 1000000000);
		int count = 0;
		while (snps.hasNext()) {
			snps.next();
			count++;
		}
		snps.close();
		vcfReader.close();
		
		//check snps
		assertEquals(905, count);

	}
 
Example 29
Source Project: genomewarp   Source File: VcfToVariant.java    License: Apache License 2.0 5 votes vote down vote up
public static List<Variant> convertVcfToVariant(File filepath, boolean checkVersion)
    throws IOException {
  // Check version
  if (checkVersion && !validVersion(filepath)) {
    return null;
  }

  VCFFileReader vcfReader = new VCFFileReader(filepath, false);
  VCFHeader header = vcfReader.getFileHeader();

  return convertContextToVariant(vcfReader, header);
}
 
Example 30
Source Project: genomewarp   Source File: GenomeRangeUtils.java    License: Apache License 2.0 5 votes vote down vote up
/**
 * Performs improved preprocessing of the input confident regions
 *
 * Splits the confident regions to do local remapping around variants in the input VCF to maximize
 * recall of variants.
 *
 * @param inputBEDPerChromosome map of chromosome to the list of confident regions. Confident regions must not contain non-DNA characters.
 * @param inputVcf input VCF file
 * @param bedWindowSize regions from intput BED will be splitted to subregions of at most this size
 * @return list of the GenomeRanges ready for lift over to the target genome
 */
public static List<GenomeRange> generateQueryBEDWithImprovedPreprocessing(
    SortedMap<String, List<GenomeRange>> inputBEDPerChromosome, String inputVcf,
    int bedWindowSize) {
  List<GenomeRange> queryBED = new ArrayList<>();
  VCFFileReader vcfReader = new VCFFileReader(new File(inputVcf), false);

  logger.log(Level.INFO, "Generating regions from variants");
  SortedMap<String, List<GenomeRange>> fromVcfBEDPerChromosome = generateBEDFromVCF(vcfReader, inputBEDPerChromosome.keySet());

  for (String chromosome: inputBEDPerChromosome.keySet()) {
    List<GenomeRange> inputBEDChr = inputBEDPerChromosome.get(chromosome);
    Collections.sort(inputBEDChr);

    List<GenomeRange> fromVcfBEDChr = fromVcfBEDPerChromosome.get(chromosome);
    if (fromVcfBEDChr == null) {
      continue;
    }
    Collections.sort(fromVcfBEDChr);

    logger.log(Level.INFO,
        "Filtering out VCF regions not covered by confident regions");
    List<GenomeRange> filteredVcfBEDChr = filterOutNotCoveredVariants(inputBEDChr,
        fromVcfBEDChr);

    logger.log(Level.INFO, "Adding padding to VCF regions");
    List<GenomeRange> paddedVcfBEDChr = generatePaddedGenomeRanges(filteredVcfBEDChr);

    logger.log(Level.INFO, "Merging overlapping VCF regions");
    List<GenomeRange> mergedVcfBEDChr = mergeOverlaps(paddedVcfBEDChr);

    logger.log(Level.INFO, String
        .format("Merging query regions with regions from VCF (%d records) from chromosome %s",
            mergedVcfBEDChr.size(), chromosome));
    List<GenomeRange> intermediateBED = mergeRegionsFromQueryBEDAndVariants(
        inputBEDChr, mergedVcfBEDChr, bedWindowSize);
    queryBED.addAll(massageBED(intermediateBED));
  }
  return queryBED;
}