htsjdk.variant.vcf.VCFFileReader Java Examples

The following examples show how to use htsjdk.variant.vcf.VCFFileReader. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: VcfFuncotationFactoryUnitTest.java    From gatk with BSD 3-Clause "New" or "Revised" License 6 votes vote down vote up
private Triple<VariantContext, ReferenceContext, List<Feature>> createDummyCacheTriples(final List<String> alleles, final int offset) {

        // Create an interval for a variant that overlaps an entry in the exac snippet test file when offset = 0.
        final SimpleInterval variantInterval = new SimpleInterval("3", 13372+offset, 13372+offset);
        final VariantContext vc =  new VariantContextBuilder()
                .chr(variantInterval.getContig()).start(variantInterval.getStart()).stop(variantInterval.getEnd())
                .alleles(alleles)
                .make();
        final ReferenceContext referenceContext = new ReferenceContext(ReferenceDataSource.of(Paths.get(FuncotatorReferenceTestUtils.retrieveB37Chr3Ref())), variantInterval);
        final List<Feature> vcfFeatures;
        try (final VCFFileReader vcfReader = new VCFFileReader(IOUtils.getPath(EXAC_SNIPPET))) {
            vcfFeatures = vcfReader.query(variantInterval.getContig(), variantInterval.getStart(), variantInterval.getEnd()).stream().collect(Collectors.toList());
        }

        return Triple.of(vc, referenceContext, vcfFeatures);
    }
 
Example #2
Source File: InfiniumVcfFieldsTest.java    From picard with MIT License 6 votes vote down vote up
@Test
public void testGetValueFromVcfOtherHeaderLine() throws ParseException {
    try (final VCFFileReader in = new VCFFileReader(TEST_VCF_FILE, false)) {
        final VCFHeader header = in.getFileHeader();
        Assert.assertEquals(InfiniumVcfFields.getOptionalValueFromVcfOtherHeaderLine(header, InfiniumVcfFields.PIPELINE_VERSION), "IlluminaGenotypingArray_v1.5");
        Assert.assertEquals(InfiniumVcfFields.getIntegerFromVcfOtherHeaderLine(header, InfiniumVcfFields.ANALYSIS_VERSION_NUMBER).intValue(), 1);
        Assert.assertEquals(InfiniumVcfFields.getValueFromVcfOtherHeaderLine(header, InfiniumVcfFields.EXTENDED_ILLUMINA_MANIFEST_VERSION), "1.3");
        Assert.assertEquals(InfiniumVcfFields.getOptionalDoubleFromVcfOtherHeaderLine(header, InfiniumVcfFields.GTC_CALL_RATE), 0.985);
        Assert.assertEquals(InfiniumVcfFields.getOptionalValueFromVcfOtherHeaderLine(header, InfiniumVcfFields.AUTOCALL_GENDER), "M");
        Assert.assertNull(InfiniumVcfFields.getOptionalValueFromVcfOtherHeaderLine(header, "nothing"));
        Assert.assertNull(InfiniumVcfFields.getOptionalIntegerFromVcfOtherHeaderLine(header, "nothing"));
        Integer analysisVersionNumber = InfiniumVcfFields.getOptionalIntegerFromVcfOtherHeaderLine(header, InfiniumVcfFields.ANALYSIS_VERSION_NUMBER);
        Assert.assertNotNull(analysisVersionNumber);
        Assert.assertEquals(analysisVersionNumber.intValue(), 1);

        final Date truthAutocallDate = new Iso8601Date(autocallDateFormat.parse("04/18/2019 20:57"));
        final Iso8601Date autocallDate = InfiniumVcfFields.getDateFromVcfOtherHeaderLine(header, InfiniumVcfFields.AUTOCALL_DATE, autocallDateFormat);
        Assert.assertEquals(autocallDate, truthAutocallDate);
    }
}
 
Example #3
Source File: VcfToVariantTest.java    From genomewarp with Apache License 2.0 6 votes vote down vote up
@Test
/**
 * Note: HTSJDK cannot distinguish between PASS filters and
 * unfiltered for variant calls (it can for general filters).
 * As a result, all variant calls which PASS do not have filter
 * information applied.
 */
public void testGetCalls() throws Exception {
  File vcfFile =
      new File(VcfToVariant.class.getClassLoader().getResource(VALID_VCF_4_1).getFile());
  VCFFileReader vcfReader = new VCFFileReader(vcfFile, false);
  VCFHeader vcfHeader = vcfReader.getFileHeader();
  int currVariant = 0;

  for (final VariantContext vc : vcfReader) {
    List<VariantCall> callList = VcfToVariant.getCalls(vc, vcfHeader);
    assertEquals(callList, TRUTH.get(currVariant).getCallsList());
    currVariant++;
  }
}
 
Example #4
Source File: FingerprintChecker.java    From picard with MIT License 6 votes vote down vote up
/**
 * Loads genotypes from the supplied file into one or more Fingerprint objects and returns them in a
 * Map of Sample->Fingerprint.
 *
 * @param fingerprintFile - VCF file containing genotypes for one or more samples
 * @param specificSample  - null to load genotypes for all samples contained in the file or the name
 *                        of an individual sample to load (and exclude all others).
 * @return a Map of Sample name to Fingerprint
 */
public Map<String, Fingerprint> loadFingerprints(final Path fingerprintFile, final String specificSample) {
    final VCFFileReader reader = new VCFFileReader(fingerprintFile, false);
    checkDictionaryGoodForFingerprinting(reader.getFileHeader().getSequenceDictionary());

    final Map<String, Fingerprint> fingerprints;
    if (reader.isQueryable()) {
        fingerprints = loadFingerprintsFromQueriableReader(reader, specificSample, fingerprintFile);
    } else {
        log.warn("Couldn't find index for file " + fingerprintFile + " going to read through it all.");
        fingerprints = loadFingerprintsFromVariantContexts(reader, specificSample, fingerprintFile);
    }
    //add an entry for each sample which was not fingerprinted
    reader.getFileHeader().getGenotypeSamples().forEach(sample -> fingerprints.computeIfAbsent(sample, s -> new Fingerprint(s, fingerprintFile, null)));

    return fingerprints;
}
 
Example #5
Source File: GenomeWarpSerialTest.java    From genomewarp with Apache License 2.0 6 votes vote down vote up
@Test
public void testGenerateBEDFromVCF() {
  String file = GenomeWarpSerialTest.class.getClassLoader().getResource(VCF_PATH).getFile();
  VCFFileReader testVcf = new VCFFileReader(new File(file), false);

  SortedMap<String, List<GenomeRange>> want = new TreeMap<String, List<GenomeRange>>() {{
    put("chr1", new ArrayList<GenomeRange>() {{
      add(new GenomeRange("chr1", 49, 50));
      add(new GenomeRange("chr1", 51, 52));
      add(new GenomeRange("chr1", 119, 122));
      add(new GenomeRange("chr1", 136, 137));
      add(new GenomeRange("chr1", 189, 190));
    }});
    put("chr2", new ArrayList<GenomeRange>() {{
      add(new GenomeRange("chr2", 139, 141));
    }});
  }};

  SortedMap<String, List<GenomeRange>> got = GenomeRangeUtils.generateBEDFromVCF(testVcf, null);
  assertEquals(got.size(), want.size());
  for(String key: got.keySet()) {
    assertTrue(GenomeWarpTestUtils.equivalentRanges(got.get(key), want.get(key)));
  }
}
 
Example #6
Source File: GATKToolUnitTest.java    From gatk with BSD 3-Clause "New" or "Revised" License 6 votes vote down vote up
@Test
public void testFeaturesHeader() throws Exception {
    final TestGATKToolWithFeatures tool = new TestGATKToolWithFeatures();
    final CommandLineParser clp = new CommandLineArgumentParser(tool);
    final File vcfFile = new File(publicTestDir + "org/broadinstitute/hellbender/engine/feature_data_source_test_with_bigHeader.vcf");
    final String[] args = {"--mask", vcfFile.getCanonicalPath()};
    clp.parseArguments(System.out, args);
    tool.onStartup();
    final Object headerForFeatures = tool.getHeaderForFeatures(tool.mask);
    Assert.assertTrue(headerForFeatures instanceof VCFHeader);
    final VCFHeader  vcfheaderForFeatures = (VCFHeader) headerForFeatures;

    try(final VCFFileReader vcfReader = new VCFFileReader(vcfFile, false)){  //read the file directly and compare headers
        final VCFHeader vcfFileHeader= vcfReader.getFileHeader();
        Assert.assertEquals(vcfheaderForFeatures.getGenotypeSamples(), vcfFileHeader.getGenotypeSamples());
        Assert.assertEquals(vcfheaderForFeatures.getInfoHeaderLines(), vcfFileHeader.getInfoHeaderLines());
        Assert.assertEquals(vcfheaderForFeatures.getFormatHeaderLines(), vcfFileHeader.getFormatHeaderLines());
        Assert.assertEquals(vcfheaderForFeatures.getFilterLines(), vcfFileHeader.getFilterLines());
        Assert.assertEquals(vcfheaderForFeatures.getContigLines(), vcfFileHeader.getContigLines());
        Assert.assertEquals(vcfheaderForFeatures.getOtherHeaderLines(), vcfFileHeader.getOtherHeaderLines());
    }
    tool.doWork();
    tool.onShutdown();
}
 
Example #7
Source File: ConvertGSVariantsToSegmentsIntegrationTest.java    From gatk-protected with BSD 3-Clause "New" or "Revised" License 6 votes vote down vote up
private List<HiddenStateSegmentRecord<CopyNumberTriState, Target>> composeExpectedSegments(final File vcf, final TargetCollection<Target> targets) throws IOException {
    final VCFFileReader reader = new VCFFileReader(vcf, false);
    final List<HiddenStateSegmentRecord<CopyNumberTriState, Target>> result = new ArrayList<>();
    reader.iterator().forEachRemaining(vc -> {
        final int targetCount = targets.indexRange(vc).size();
        for (final Genotype genotype : vc.getGenotypes()) {
            final int cn = Integer.parseInt(genotype.getExtendedAttribute("CN").toString());
            final double[] cnp = Stream.of(genotype.getExtendedAttribute("CNP").toString().replaceAll("\\[\\]", "").split(","))
                    .mapToDouble(Double::parseDouble).toArray();

            final double cnpSum = MathUtils.approximateLog10SumLog10(cnp);
            final CopyNumberTriState call = expectedCall(cn);
            final double exactLog10Prob = expectedExactLog10(call, cnp);
            final HiddenStateSegment<CopyNumberTriState, Target> expectedSegment = new HiddenStateSegment<>(
                    new SimpleInterval(vc), targetCount, Double.parseDouble(genotype.getExtendedAttribute("CNF").toString()),
                    0.000, call, -10.0 * exactLog10Prob, Double.NaN,  Double.NaN, Double.NaN,
                      -10.0 * (cnp[ConvertGSVariantsToSegments.NEUTRAL_COPY_NUMBER_DEFAULT] - cnpSum)
            );
            result.add(new HiddenStateSegmentRecord<>(genotype.getSampleName(), expectedSegment));
        }
    });
    return result;
}
 
Example #8
Source File: MNVValidatorApplication.java    From hmftools with GNU General Public License v3.0 6 votes vote down vote up
private static void processVariants(boolean strelka, @NotNull final String filePath, @NotNull final String outputVcf,
        @NotNull final String tumorBam) {
    final VCFFileReader vcfReader = new VCFFileReader(new File(filePath), false);
    final VCFHeader outputHeader = generateOutputHeader(vcfReader.getFileHeader(), "TUMOR");
    final VariantContextWriter vcfWriter = new VariantContextWriterBuilder().setOutputFile(outputVcf)
            .setReferenceDictionary(vcfReader.getFileHeader().getSequenceDictionary())
            .build();
    vcfWriter.writeHeader(outputHeader);
    final MNVValidator validator = ImmutableMNVValidator.of(tumorBam);
    final MNVMerger merger = ImmutableMNVMerger.of(outputHeader);
    Pair<PotentialMNVRegion, Optional<PotentialMNVRegion>> outputPair = ImmutablePair.of(PotentialMNVRegion.empty(), Optional.empty());
    for (final VariantContext rawVariant : vcfReader) {
        final VariantContext simplifiedVariant =
                strelka ? StrelkaPostProcess.simplifyVariant(rawVariant, StrelkaPostProcess.TUMOR_GENOTYPE) : rawVariant;

        final PotentialMNVRegion potentialMNV = outputPair.getLeft();
        outputPair = MNVDetector.addMnvToRegion(potentialMNV, simplifiedVariant);
        outputPair.getRight().ifPresent(mnvRegion -> validator.mergeVariants(mnvRegion, merger).forEach(vcfWriter::add));
    }
    validator.mergeVariants(outputPair.getLeft(), merger).forEach(vcfWriter::add);
    vcfWriter.close();
    vcfReader.close();
    LOGGER.info("Written output variants to " + outputVcf);
}
 
Example #9
Source File: GatherVcfsTest.java    From picard with MIT License 6 votes vote down vote up
@Test(dataProvider = "vcfshards")
public void TestGatherFiles(final List<File> inputFiles, final File expectedOutput, final int expectedRetVal, boolean reorder) throws IOException {
    final String comment1 = "This is a comment";
    final List<String> args = new ArrayList<>();

    final File output = VcfTestUtils.createTemporaryIndexedFile("result", expectedOutput.getAbsolutePath().endsWith(".vcf") ? ".vcf" : ".vcf.gz");

    inputFiles.forEach(f -> args.add("INPUT=" + f.getAbsolutePath()));
    args.add("OUTPUT=" + output.getAbsolutePath());
    args.add("COMMENT=" + comment1);
    args.add("REORDER_INPUT_BY_FIRST_VARIANT=" +  reorder);

    Assert.assertEquals(runPicardCommandLine(args.toArray(new String[args.size()])), expectedRetVal, "Program was expected to run successfully, but didn't.");

    if (expectedRetVal == 0) {
        final VCFFileReader expectedReader = new VCFFileReader(expectedOutput, false);
        final VCFFileReader outputReader = new VCFFileReader(output, false);
        Assert.assertTrue(outputReader.getFileHeader().getMetaDataInInputOrder().stream().anyMatch(H->H.getKey().equals("GatherVcfs.comment") && H.getValue().equals(comment1)));
        Assert.assertEquals(expectedReader.iterator().stream().count(), outputReader.iterator().stream().count(), "The wrong number of variants was found.");
        outputReader.close();
        expectedReader.close();
    }
}
 
Example #10
Source File: AbstractVcfMergingClpTester.java    From picard with MIT License 6 votes vote down vote up
/**
 * Make sure that the order of the output file is identical to the order
 * of the input files by iterating through the output, making sure that,
 * if the context is an indel (snp), the next genomic position in the indel
 * (snp) queue is the same. Also make sure that the context is in the order
 * specified by the input files.
 */
private void validateSnpAndIndelResults(final File output, final Queue<String> indelContigPositions, final Queue<String> snpContigPositions) {
    final VCFFileReader outputReader = new VCFFileReader(output, false);
    final VariantContextComparator outputComparator = outputReader.getFileHeader().getVCFRecordComparator();
    VariantContext last = null;
    final CloseableIterator<VariantContext> iterator = outputReader.iterator();
    while (iterator.hasNext()) {
        final VariantContext outputContext = iterator.next();
        if (outputContext.isIndel()) Assert.assertEquals(getContigPosition(outputContext), indelContigPositions.poll());
        if (outputContext.isSNP()) Assert.assertEquals(getContigPosition(outputContext), snpContigPositions.poll());
        if (last != null) Assert.assertTrue(outputComparator.compare(last, outputContext) <= 0);
        last = outputContext;
    }
    iterator.close();

    // We should have polled everything off the indel (snp) queues
    Assert.assertEquals(indelContigPositions.size(), 0);
    Assert.assertEquals(snpContigPositions.size(), 0);
}
 
Example #11
Source File: TestFilterVcf.java    From picard with MIT License 6 votes vote down vote up
@Test(dataProvider = "goodInputVcfs")
public void testJavaScript(final File input) throws Exception {
    final File out = VcfTestUtils.createTemporaryIndexedFile("filterVcfTestJS.", ".vcf");
    final FilterVcf filterer = new FilterVcf();
    filterer.INPUT = input;
    filterer.OUTPUT = out;
    filterer.JAVASCRIPT_FILE = quickJavascriptFilter("variant.getStart()%5 != 0");

    final int retval = filterer.doWork();
    Assert.assertEquals(retval, 0);

    //count the number of reads
    final int expectedNumber = 4;
    int count = 0;
    VCFFileReader in = new VCFFileReader(filterer.OUTPUT, false);
    CloseableIterator<VariantContext> iter = in.iterator();
    while (iter.hasNext()) {
        final VariantContext ctx = iter.next();
        count += (ctx.isFiltered() ? 1 : 0);
    }
    iter.close();
    in.close();
    Assert.assertEquals(count, expectedNumber);
}
 
Example #12
Source File: CpxVariantReInterpreterSparkIntegrationTest.java    From gatk with BSD 3-Clause "New" or "Revised" License 6 votes vote down vote up
private static void vcfEquivalenceTest(final String generatedVCFPath, final String expectedVCFPath,
                                       final List<String> attributesToIgnore, final boolean onHDFS) throws Exception {

    List<VariantContext> expectedVcs;
    try (final VCFFileReader fileReader = new VCFFileReader(new File(expectedVCFPath), false) ) {
        try (final CloseableIterator<VariantContext> iterator = fileReader.iterator()) {
            expectedVcs = Utils.stream(iterator).collect(Collectors.toList());
        }
    }

    final List<VariantContext> actualVcs = StructuralVariationDiscoveryPipelineSparkIntegrationTest
            .extractActualVCs(generatedVCFPath, onHDFS);

    GATKBaseTest.assertCondition(actualVcs, expectedVcs,
            (a, e) -> VariantContextTestUtils.assertVariantContextsAreEqual(a, e, attributesToIgnore, Collections.emptyList()));
}
 
Example #13
Source File: SortVcf.java    From picard with MIT License 6 votes vote down vote up
/**
 * Merge the inputs and sort them by adding each input's content to a single SortingCollection.
 * <p/>
 * NB: It would be better to have a merging iterator as in MergeSamFiles, as this would perform better for pre-sorted inputs.
 * Here, we are assuming inputs are unsorted, and so adding their VariantContexts iteratively is fine for now.
 * MergeVcfs exists for simple merging of presorted inputs.
 *
 * @param readers      - a list of VCFFileReaders, one for each input VCF
 * @param outputHeader - The merged header whose information we intend to use in the final output file
 */
private SortingCollection<VariantContext> sortInputs(final List<VCFFileReader> readers, final VCFHeader outputHeader) {
    final ProgressLogger readProgress = new ProgressLogger(log, 25000, "read", "records");

    // NB: The default MAX_RECORDS_IN_RAM may not be appropriate here. VariantContexts are smaller than SamRecords
    // We would have to play around empirically to find an appropriate value. We are not performing this optimization at this time.
    final SortingCollection<VariantContext> sorter =
            SortingCollection.newInstance(
                    VariantContext.class,
                    new VCFRecordCodec(outputHeader, VALIDATION_STRINGENCY != ValidationStringency.STRICT),
                    outputHeader.getVCFRecordComparator(),
                    MAX_RECORDS_IN_RAM,
                    TMP_DIR);
    int readerCount = 1;
    for (final VCFFileReader reader : readers) {
        log.info("Reading entries from input file " + readerCount);
        for (final VariantContext variantContext : reader) {
            sorter.add(variantContext);
            readProgress.record(variantContext.getContig(), variantContext.getStart());
        }
        reader.close();
        readerCount++;
    }
    return sorter;
}
 
Example #14
Source File: AnnotateStrelkaWithAllelicDepth.java    From hmftools with GNU General Public License v3.0 6 votes vote down vote up
private AnnotateStrelkaWithAllelicDepth(final Options options, final String... args) throws ParseException {
    final CommandLine cmd = createCommandLine(args, options);
    if (!cmd.hasOption(VCF_IN)) {
        throw new ParseException(VCF_IN + " is a mandatory argument");
    }

    if (!cmd.hasOption(VCF_OUT)) {
        throw new ParseException(VCF_OUT + " is a mandatory argument");
    }

    inputVCF = cmd.getOptionValue(VCF_IN);
    outputVCF = cmd.getOptionValue(VCF_OUT);

    vcfReader = new VCFFileReader(new File(inputVCF), false);
    header = generateOutputHeader(vcfReader.getFileHeader());
    vcfWriter = new VariantContextWriterBuilder().setOutputFile(outputVCF)
            .setReferenceDictionary(header.getSequenceDictionary())
            .setIndexCreator(new TabixIndexCreator(header.getSequenceDictionary(), new TabixFormat()))
            .setOption(htsjdk.variant.variantcontext.writer.Options.ALLOW_MISSING_FIELDS_IN_HEADER)
            .build();
}
 
Example #15
Source File: FingerprintChecker.java    From picard with MIT License 6 votes vote down vote up
/**
 * Loads genotypes from the supplied reader into one or more Fingerprint objects and returns them in a
 * Map of Sample->Fingerprint.
 *
 * @param reader         - VCF reader containing genotypes for one or more samples
 * @param specificSample - null to load genotypes for all samples contained in the file or the name
 *                       of an individual sample to load (and exclude all others).
 * @param source         The path of the source file used. used to emit errors.
 * @return a Map of Sample name to Fingerprint
 */
public Map<String, Fingerprint> loadFingerprintsFromQueriableReader(final VCFFileReader reader, final String specificSample, final Path source) {

    checkDictionaryGoodForFingerprinting(reader.getFileHeader().getSequenceDictionary());

    final SortedSet<Snp> snps = new TreeSet<>(haplotypes.getAllSnps());

    return loadFingerprintsFromVariantContexts(() ->
                    snps.stream().map(snp -> {
                        try {
                            return reader.query(snp.getChrom(), snp.getPos(), snp.getPos()).next();
                        } catch (NoSuchElementException e) {
                            return null;
                        }
                    }).iterator(),
            specificSample, source);
}
 
Example #16
Source File: CollectSamErrorMetrics.java    From picard with MIT License 6 votes vote down vote up
/**
 * If there's a variant at the locus return true, otherwise false.
 * <p>
 * HAS SIDE EFFECTS!!! Queries the vcfFileReader
 *
 * @param vcfFileReader      a {@link VCFFileReader} to query for the given locus
 * @param locusInfo          a {@link SamLocusIterator.LocusInfo} at which to examine the variants
 * @return true if there's a variant over the locus, false otherwise.
 */
private static boolean checkLocus(final VCFFileReader vcfFileReader, final SamLocusIterator.LocusInfo locusInfo) {
    boolean overlaps = false;

    if (locusInfo != null) {
        try (final CloseableIterator<VariantContext> vcfIterator = vcfFileReader.query(locusInfo)) {

            while (vcfIterator.hasNext()) {
                if (vcfIterator.next().isFiltered()) {
                    continue;
                }
                overlaps = true;
                break;
            }
        }
    }
    return overlaps;
}
 
Example #17
Source File: ByIntervalListVariantContextIteratorTest.java    From picard with MIT License 5 votes vote down vote up
@Test
public void testNoOverlapDifferentContig() {
    final IntervalList intervalList         = new IntervalList(header);
    intervalList.add(new Interval("3", 167166899, 167166899));
    final VCFFileReader reader              = getReader(CEU_TRIOS_SNPS_VCF);
    final Iterator<VariantContext> iterator = new ByIntervalListVariantContextIterator(reader, intervalList);
    Assert.assertFalse(iterator.hasNext());
    reader.close();
}
 
Example #18
Source File: UpdateVCFSequenceDictionaryIntegrationTest.java    From gatk with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
private SAMSequenceDictionary updateSequenceDictionary(
    final File inputVariantsFile,
    final File inputSourceFile,
    final File inputReferenceFile,
    final String masterSequenceDictionary,
    final boolean replace,
    final boolean disableSequenceDictionaryValidation)
{
    ArgumentsBuilder argBuilder = new ArgumentsBuilder();

    argBuilder.addVCF(inputVariantsFile);
    if (inputReferenceFile != null) {
        argBuilder.addReference(inputReferenceFile);
    }
    if (inputSourceFile != null) {
        argBuilder.add(UpdateVCFSequenceDictionary.DICTIONARY_ARGUMENT_NAME, inputSourceFile);
    }
    if (replace) {
        argBuilder.add(UpdateVCFSequenceDictionary.REPLACE_ARGUMENT_NAME, Boolean.toString(replace));
    }
    if (disableSequenceDictionaryValidation) {
        argBuilder.add(StandardArgumentDefinitions.DISABLE_SEQUENCE_DICT_VALIDATION_NAME, Boolean.toString(disableSequenceDictionaryValidation));
    }
    if (masterSequenceDictionary != null) {
        argBuilder.add(StandardArgumentDefinitions.SEQUENCE_DICTIONARY_NAME, masterSequenceDictionary);
    }

    File outFile = createTempFile("updateSequenceDictionary", ".vcf.gz");
    argBuilder.addOutput(outFile);
    runCommandLine(argBuilder.getArgsList());

    // Don't require an index, since the framework doesn't create one if no input sequence
    // dictionary is available via getBestAvailableSequenceDictionary.
    try (VCFFileReader vcfReader = new VCFFileReader(outFile, false)) {
        return vcfReader.getFileHeader().getSequenceDictionary();
    }
}
 
Example #19
Source File: ByIntervalListVariantContextIteratorTest.java    From picard with MIT License 5 votes vote down vote up
@Test
public void testEmptyIntervalList() {
    final IntervalList intervalList         = new IntervalList(header);
    final VCFFileReader reader              = getReader(CEU_TRIOS_SNPS_VCF);
    final Iterator<VariantContext> iterator = new ByIntervalListVariantContextIterator(reader, intervalList);
    Assert.assertFalse(iterator.hasNext());
    reader.close();
}
 
Example #20
Source File: ByIntervalListVariantContextIteratorTest.java    From picard with MIT License 5 votes vote down vote up
@Test
public void testSimpleOverlap() {
    final IntervalList intervalList         = new IntervalList(header);
    intervalList.add(new Interval("2", 167166899, 167166899));
    final VCFFileReader reader              = getReader(CEU_TRIOS_SNPS_VCF);
    final Iterator<VariantContext> iterator = new ByIntervalListVariantContextIterator(reader, intervalList);
    Assert.assertTrue(iterator.hasNext());
    final VariantContext ctx = iterator.next();
    Assert.assertEquals(ctx.getStart(), 167166899);
    Assert.assertFalse(iterator.hasNext());
    reader.close();
}
 
Example #21
Source File: ByIntervalListVariantContextIteratorTest.java    From picard with MIT License 5 votes vote down vote up
@Test
public void testNoVariants() {
    final IntervalList intervalList         = new IntervalList(header);
    intervalList.add(new Interval(this.dict.getSequence(0).getSequenceName(), 1, 100));
    final VCFFileReader reader              = getReader(EMPTY_VCF);
    final Iterator<VariantContext> iterator = new ByIntervalListVariantContextIterator(reader, intervalList);
    Assert.assertFalse(iterator.hasNext());
    reader.close();
}
 
Example #22
Source File: VcfFormatConverter.java    From picard with MIT License 5 votes vote down vote up
@Override
protected int doWork() {
    final ProgressLogger progress = new ProgressLogger(LOG, 10000);
    
    IOUtil.assertFileIsReadable(INPUT);
    IOUtil.assertFileIsWritable(OUTPUT);

    final VCFFileReader reader = new VCFFileReader(INPUT, REQUIRE_INDEX);
    final VCFHeader header = new VCFHeader(reader.getFileHeader());
    final SAMSequenceDictionary sequenceDictionary = header.getSequenceDictionary();
    if (CREATE_INDEX && sequenceDictionary == null) {
        throw new PicardException("A sequence dictionary must be available in the input file when creating indexed output.");
    }

    final VariantContextWriterBuilder builder = new VariantContextWriterBuilder()
            .setOutputFile(OUTPUT)
            .setReferenceDictionary(sequenceDictionary);
    if (CREATE_INDEX)
        builder.setOption(Options.INDEX_ON_THE_FLY);
    else
        builder.unsetOption(Options.INDEX_ON_THE_FLY);
    final VariantContextWriter writer = builder.build();
    writer.writeHeader(header);
    final CloseableIterator<VariantContext> iterator = reader.iterator();

    while (iterator.hasNext()) {
        final VariantContext context = iterator.next();
        writer.add(context);
        progress.record(context.getContig(), context.getStart());
    }

    CloserUtil.close(iterator);
    CloserUtil.close(reader);
    writer.close();

    return 0;
}
 
Example #23
Source File: UpdateVcfSequenceDictionary.java    From picard with MIT License 5 votes vote down vote up
@Override
protected int doWork() {
    IOUtil.assertFileIsReadable(INPUT);
    IOUtil.assertFileIsReadable(SEQUENCE_DICTIONARY);
    IOUtil.assertFileIsWritable(OUTPUT);

    final SAMSequenceDictionary samSequenceDictionary = SAMSequenceDictionaryExtractor.extractDictionary(SEQUENCE_DICTIONARY.toPath());

    final VCFFileReader fileReader = new VCFFileReader(INPUT, false);
    final VCFHeader fileHeader = fileReader.getFileHeader();

    final VariantContextWriterBuilder builder = new VariantContextWriterBuilder()
            .setReferenceDictionary(samSequenceDictionary)
            .clearOptions();
    if (CREATE_INDEX)
        builder.setOption(Options.INDEX_ON_THE_FLY);

    final VariantContextWriter vcfWriter = builder.setOutputFile(OUTPUT).build();
    fileHeader.setSequenceDictionary(samSequenceDictionary);
    vcfWriter.writeHeader(fileHeader);

    final ProgressLogger progress = new ProgressLogger(log, 10000);
    final CloseableIterator<VariantContext> iterator = fileReader.iterator();
    while (iterator.hasNext()) {
        final VariantContext context = iterator.next();
        vcfWriter.add(context);
        progress.record(context.getContig(), context.getStart());
    }

    CloserUtil.close(iterator);
    CloserUtil.close(fileReader);
    vcfWriter.close();

    return 0;
}
 
Example #24
Source File: LiftoverVcfTest.java    From picard with MIT License 5 votes vote down vote up
@Test
public void testChangingInfoFields() {
    final String filename = "testLiftoverFailingVariants.vcf";
    final File liftOutputFile = new File(OUTPUT_DATA_PATH, "lift-delete-me.vcf");
    final File rejectOutputFile = new File(OUTPUT_DATA_PATH, "reject-delete-me.vcf");
    final File input = new File(TEST_DATA_PATH, filename);

    liftOutputFile.deleteOnExit();
    rejectOutputFile.deleteOnExit();

    final String[] args = new String[]{
            "INPUT=" + input.getAbsolutePath(),
            "OUTPUT=" + liftOutputFile.getAbsolutePath(),
            "REJECT=" + rejectOutputFile.getAbsolutePath(),
            "CHAIN=" + CHAIN_FILE,
            "REFERENCE_SEQUENCE=" + REFERENCE_FILE,
            "RECOVER_SWAPPED_REF_ALT=true",
            "CREATE_INDEX=false"
    };
    Assert.assertEquals(runPicardCommandLine(args), 0);

    final VCFFileReader liftReader = new VCFFileReader(liftOutputFile, false);
    for (final VariantContext vc : liftReader) {
        Assert.assertTrue(vc.hasAttribute("AF"));

        Assert.assertEquals(vc.hasAttribute(LiftoverUtils.SWAPPED_ALLELES), vc.hasAttribute("FLIPPED_AF"), vc.toString());
        Assert.assertEquals(!vc.hasAttribute(LiftoverUtils.SWAPPED_ALLELES), vc.hasAttribute("MAX_AF") && vc.getAttribute("MAX_AF") != null, vc.toString());

        if (vc.hasAttribute(LiftoverUtils.SWAPPED_ALLELES)) {
            final double af = vc.getAttributeAsDouble("AF", -1.0);
            final double flippedAf = vc.getAttributeAsDouble("FLIPPED_AF", -2.0);

            Assert.assertTrue(TestNGUtil.compareDoubleWithAccuracy(af, flippedAf, 0.01),
                    "Error while comparing AF. expected " + flippedAf + " but found " + af);
        }
    }
}
 
Example #25
Source File: RenameSampleInVcf.java    From picard with MIT License 5 votes vote down vote up
@Override
protected int doWork() {
    IOUtil.assertFileIsReadable(INPUT);
    IOUtil.assertFileIsWritable(OUTPUT);

    final VCFFileReader in = new VCFFileReader(INPUT, false);
    final VCFHeader header = in.getFileHeader();

    if (header.getGenotypeSamples().size() > 1) {
        throw new IllegalArgumentException("Input VCF must be single-sample.");
    }

    if (OLD_SAMPLE_NAME != null && !OLD_SAMPLE_NAME.equals(header.getGenotypeSamples().get(0))) {
        throw new IllegalArgumentException("Input VCF did not contain expected sample. Contained: " + header.getGenotypeSamples().get(0));
    }

    final EnumSet<Options> options = EnumSet.copyOf(VariantContextWriterBuilder.DEFAULT_OPTIONS);
    if (CREATE_INDEX) options.add(Options.INDEX_ON_THE_FLY); else options.remove(Options.INDEX_ON_THE_FLY);

    final VCFHeader outHeader = new VCFHeader(header.getMetaDataInInputOrder(), CollectionUtil.makeList(NEW_SAMPLE_NAME));
    final VariantContextWriter out = new VariantContextWriterBuilder()
            .setOptions(options)
            .setOutputFile(OUTPUT).setReferenceDictionary(outHeader.getSequenceDictionary()).build();
    out.writeHeader(outHeader);

    for (final VariantContext ctx : in) {
        out.add(ctx);
    }

    out.close();
    in.close();

    return 0;
}
 
Example #26
Source File: SortVcf.java    From picard with MIT License 5 votes vote down vote up
private void collectFileReadersAndHeaders(final List<String> sampleList, SAMSequenceDictionary samSequenceDictionary) {
    for (final File input : INPUT) {
        final VCFFileReader in = new VCFFileReader(input, false);
        final VCFHeader header = in.getFileHeader();
        final SAMSequenceDictionary dict = in.getFileHeader().getSequenceDictionary();
        if (dict == null || dict.isEmpty()) {
            if (null == samSequenceDictionary) {
                throw new IllegalArgumentException("Sequence dictionary was missing or empty for the VCF: " + input.getAbsolutePath() + " Please add a sequence dictionary to this VCF or specify SEQUENCE_DICTIONARY.");
            }
            header.setSequenceDictionary(samSequenceDictionary);
        } else {
            if (null == samSequenceDictionary) {
                samSequenceDictionary = dict;
            } else {
                try {
                    samSequenceDictionary.assertSameDictionary(dict);
                } catch (final AssertionError e) {
                    throw new IllegalArgumentException(e);
                }
            }
        }
        if (sampleList.isEmpty()) {
            sampleList.addAll(header.getSampleNamesInOrder());
        } else {
            if (!sampleList.equals(header.getSampleNamesInOrder())) {
                throw new IllegalArgumentException("Input file " + input.getAbsolutePath() + " has sample names that don't match the other files.");
            }
        }
        inputReaders.add(in);
        inputHeaders.add(header);
    }
}
 
Example #27
Source File: VariantIteratorProducer.java    From picard with MIT License 5 votes vote down vote up
@Override
public void close() {
    final Iterator<VCFFileReader> i = allReaders.iterator();
    while (i.hasNext()) {
        i.next().close();
        i.remove();
    }
}
 
Example #28
Source File: VariantIteratorProducer.java    From picard with MIT License 5 votes vote down vote up
@Override
protected CollectionUtil.DefaultingMap<File, VCFFileReader> initialValue() {
    return new CollectionUtil.DefaultingMap<>(file -> {
        final VCFFileReader reader = new VCFFileReader(file);
        LOG.debug(String.format("Producing a reader of %s for %s.", file, Thread.currentThread()));
        allReaders.add(reader);
        return reader;
    }, true);
}
 
Example #29
Source File: VcfFileSegmentGenerator.java    From picard with MIT License 5 votes vote down vote up
private static List<SAMSequenceRecord> readSequences(final File vcf) {
    final VCFFileReader reader = new VCFFileReader(vcf);
    final VCFHeader header = reader.getFileHeader();
    final SAMSequenceDictionary dict = header.getSequenceDictionary();
    reader.close();
    return dict.getSequences();
}
 
Example #30
Source File: ThreadsafeTest.java    From picard with MIT License 5 votes vote down vote up
/** This test doesn't even test the class, it just makes sure the cornercase test data is really a cornercase */
@Test
public void ensureTestDataActuallyHasWideVariantAtTenMillion() {
    final Joiner joiner = Joiner.on(":"); // Cheat: do a string compare
    final VCFFileReader r = new VCFFileReader(VCF_WITH_MULTI_ALLELIC_VARIANT_AT_POSITION_10MILLION);
    Assert.assertEquals(
            joiner.join(r.query("1", TEN_MILLION, TEN_MILLION)),
            joiner.join(r.query("1", TEN_MILLION + 5, TEN_MILLION + 5))
    );
    r.close();
}