Java Code Examples for htsjdk.variant.vcf.VCFFileReader#close()

The following examples show how to use htsjdk.variant.vcf.VCFFileReader#close() . You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: MNVValidatorApplication.java    From hmftools with GNU General Public License v3.0 6 votes vote down vote up
private static void processVariants(boolean strelka, @NotNull final String filePath, @NotNull final String outputVcf,
        @NotNull final String tumorBam) {
    final VCFFileReader vcfReader = new VCFFileReader(new File(filePath), false);
    final VCFHeader outputHeader = generateOutputHeader(vcfReader.getFileHeader(), "TUMOR");
    final VariantContextWriter vcfWriter = new VariantContextWriterBuilder().setOutputFile(outputVcf)
            .setReferenceDictionary(vcfReader.getFileHeader().getSequenceDictionary())
            .build();
    vcfWriter.writeHeader(outputHeader);
    final MNVValidator validator = ImmutableMNVValidator.of(tumorBam);
    final MNVMerger merger = ImmutableMNVMerger.of(outputHeader);
    Pair<PotentialMNVRegion, Optional<PotentialMNVRegion>> outputPair = ImmutablePair.of(PotentialMNVRegion.empty(), Optional.empty());
    for (final VariantContext rawVariant : vcfReader) {
        final VariantContext simplifiedVariant =
                strelka ? StrelkaPostProcess.simplifyVariant(rawVariant, StrelkaPostProcess.TUMOR_GENOTYPE) : rawVariant;

        final PotentialMNVRegion potentialMNV = outputPair.getLeft();
        outputPair = MNVDetector.addMnvToRegion(potentialMNV, simplifiedVariant);
        outputPair.getRight().ifPresent(mnvRegion -> validator.mergeVariants(mnvRegion, merger).forEach(vcfWriter::add));
    }
    validator.mergeVariants(outputPair.getLeft(), merger).forEach(vcfWriter::add);
    vcfWriter.close();
    vcfReader.close();
    LOGGER.info("Written output variants to " + outputVcf);
}
 
Example 2
Source File: SortVcf.java    From picard with MIT License 6 votes vote down vote up
/**
 * Merge the inputs and sort them by adding each input's content to a single SortingCollection.
 * <p/>
 * NB: It would be better to have a merging iterator as in MergeSamFiles, as this would perform better for pre-sorted inputs.
 * Here, we are assuming inputs are unsorted, and so adding their VariantContexts iteratively is fine for now.
 * MergeVcfs exists for simple merging of presorted inputs.
 *
 * @param readers      - a list of VCFFileReaders, one for each input VCF
 * @param outputHeader - The merged header whose information we intend to use in the final output file
 */
private SortingCollection<VariantContext> sortInputs(final List<VCFFileReader> readers, final VCFHeader outputHeader) {
    final ProgressLogger readProgress = new ProgressLogger(log, 25000, "read", "records");

    // NB: The default MAX_RECORDS_IN_RAM may not be appropriate here. VariantContexts are smaller than SamRecords
    // We would have to play around empirically to find an appropriate value. We are not performing this optimization at this time.
    final SortingCollection<VariantContext> sorter =
            SortingCollection.newInstance(
                    VariantContext.class,
                    new VCFRecordCodec(outputHeader, VALIDATION_STRINGENCY != ValidationStringency.STRICT),
                    outputHeader.getVCFRecordComparator(),
                    MAX_RECORDS_IN_RAM,
                    TMP_DIR);
    int readerCount = 1;
    for (final VCFFileReader reader : readers) {
        log.info("Reading entries from input file " + readerCount);
        for (final VariantContext variantContext : reader) {
            sorter.add(variantContext);
            readProgress.record(variantContext.getContig(), variantContext.getStart());
        }
        reader.close();
        readerCount++;
    }
    return sorter;
}
 
Example 3
Source File: InputValidationTest.java    From imputationserver with GNU Affero General Public License v3.0 5 votes vote down vote up
public void testTabixIndexCreationChr20() throws IOException {

		String configFolder = "test-data/configs/hapmap-chr1";
		// input folder contains no vcf or vcf.gz files
		String inputFolder = "test-data/data/chr20-phased";

		// create workflow context
		WorkflowTestContext context = buildContext(inputFolder, "hapmap2");

		// create step instance
		InputValidation inputValidation = new InputValidationMock(configFolder);

		// run and test
		boolean result = run(context, inputValidation);

		// check if step is failed
		assertEquals(true, result);
		assertTrue(context.hasInMemory("[OK] 1 valid VCF file(s) found."));

		
		// test tabix index and count snps
		String vcfFilename = inputFolder + "/chr20.R50.merged.1.330k.recode.small.vcf.gz";
		VCFFileReader vcfReader = new VCFFileReader(new File(vcfFilename),
				new File(vcfFilename + TabixUtils.STANDARD_INDEX_EXTENSION), true);
		CloseableIterator<VariantContext> snps = vcfReader.query("20", 1, 1000000000);
		int count = 0;
		while (snps.hasNext()) {
			snps.next();
			count++;
		}
		snps.close();
		vcfReader.close();
		
		//check snps
		assertEquals(7824, count);

	}
 
Example 4
Source File: ByIntervalListVariantContextIteratorTest.java    From picard with MIT License 5 votes vote down vote up
@Test
public void testSimpleOverlap() {
    final IntervalList intervalList         = new IntervalList(header);
    intervalList.add(new Interval("2", 167166899, 167166899));
    final VCFFileReader reader              = getReader(CEU_TRIOS_SNPS_VCF);
    final Iterator<VariantContext> iterator = new ByIntervalListVariantContextIterator(reader, intervalList);
    Assert.assertTrue(iterator.hasNext());
    final VariantContext ctx = iterator.next();
    Assert.assertEquals(ctx.getStart(), 167166899);
    Assert.assertFalse(iterator.hasNext());
    reader.close();
}
 
Example 5
Source File: ByIntervalListVariantContextIteratorTest.java    From picard with MIT License 5 votes vote down vote up
@Test
public void testNoVariants() {
    final IntervalList intervalList         = new IntervalList(header);
    intervalList.add(new Interval(this.dict.getSequence(0).getSequenceName(), 1, 100));
    final VCFFileReader reader              = getReader(EMPTY_VCF);
    final Iterator<VariantContext> iterator = new ByIntervalListVariantContextIterator(reader, intervalList);
    Assert.assertFalse(iterator.hasNext());
    reader.close();
}
 
Example 6
Source File: TestFilterVcf.java    From picard with MIT License 5 votes vote down vote up
/**
 * Consumes a VCF and returns a ListMap where each they keys are the IDs of filtered out sites and the values are the set of filters.
 */
private ListMap<String, String> slurpFilters(final File vcf) {
    final ListMap<String, String> map = new ListMap<>();
    final VCFFileReader in = new VCFFileReader(vcf, false);
    for (final VariantContext ctx : in) {
        if (ctx.isNotFiltered()) continue;
        for (final String filter : ctx.getFilters()) {
            map.add(ctx.getID(), filter);
        }
    }
    in.close();
    return map;
}
 
Example 7
Source File: ByIntervalListVariantContextIteratorTest.java    From picard with MIT License 5 votes vote down vote up
private SAMFileHeader getSAMFileHeader() {
    final VCFFileReader reader = getReader(CEU_TRIOS_SNPS_VCF);
    final SAMSequenceDictionary dict = reader.getFileHeader().getSequenceDictionary();
    reader.close();
    final SAMFileHeader header = new SAMFileHeader();
    header.setSequenceDictionary(dict);
    return header;
}
 
Example 8
Source File: ByIntervalListVariantContextIteratorTest.java    From picard with MIT License 5 votes vote down vote up
@Test
public void testVariantOverlappingMultipleIntervalsIsReturnedOnlyOnce() {
    final IntervalList intervalList         = new IntervalList(header);
    intervalList.add(new Interval("12", 68921962, 68921962)); // deletion spans this
    intervalList.add(new Interval("12", 68921964, 68921964)); // deletion spans this
    final VCFFileReader reader              = getReader(CEU_TRIOS_INDELS_VCF);
    final Iterator<VariantContext> iterator = new ByIntervalListVariantContextIterator(reader, intervalList);
    Assert.assertTrue(iterator.hasNext());
    final VariantContext ctx = iterator.next();
    Assert.assertEquals(ctx.getStart(), 68921960);
    Assert.assertEquals(ctx.getEnd(), 68921966);
    Assert.assertFalse(iterator.hasNext());
    reader.close();
}
 
Example 9
Source File: RenameSampleInVcf.java    From picard with MIT License 5 votes vote down vote up
@Override
protected int doWork() {
    IOUtil.assertFileIsReadable(INPUT);
    IOUtil.assertFileIsWritable(OUTPUT);

    final VCFFileReader in = new VCFFileReader(INPUT, false);
    final VCFHeader header = in.getFileHeader();

    if (header.getGenotypeSamples().size() > 1) {
        throw new IllegalArgumentException("Input VCF must be single-sample.");
    }

    if (OLD_SAMPLE_NAME != null && !OLD_SAMPLE_NAME.equals(header.getGenotypeSamples().get(0))) {
        throw new IllegalArgumentException("Input VCF did not contain expected sample. Contained: " + header.getGenotypeSamples().get(0));
    }

    final EnumSet<Options> options = EnumSet.copyOf(VariantContextWriterBuilder.DEFAULT_OPTIONS);
    if (CREATE_INDEX) options.add(Options.INDEX_ON_THE_FLY); else options.remove(Options.INDEX_ON_THE_FLY);

    final VCFHeader outHeader = new VCFHeader(header.getMetaDataInInputOrder(), CollectionUtil.makeList(NEW_SAMPLE_NAME));
    final VariantContextWriter out = new VariantContextWriterBuilder()
            .setOptions(options)
            .setOutputFile(OUTPUT).setReferenceDictionary(outHeader.getSequenceDictionary()).build();
    out.writeHeader(outHeader);

    for (final VariantContext ctx : in) {
        out.add(ctx);
    }

    out.close();
    in.close();

    return 0;
}
 
Example 10
Source File: VcfFileSegmentGenerator.java    From picard with MIT License 5 votes vote down vote up
private static List<SAMSequenceRecord> readSequences(final File vcf) {
    final VCFFileReader reader = new VCFFileReader(vcf);
    final VCFHeader header = reader.getFileHeader();
    final SAMSequenceDictionary dict = header.getSequenceDictionary();
    reader.close();
    return dict.getSequences();
}
 
Example 11
Source File: InputValidationTest.java    From imputationserver with GNU Affero General Public License v3.0 5 votes vote down vote up
public void testTabixIndexCreationChr1() throws IOException {

		String configFolder = "test-data/configs/hapmap-chr1";
		// input folder contains no vcf or vcf.gz files
		String inputFolder = "test-data/data/single";

		// create workflow context
		WorkflowTestContext context = buildContext(inputFolder, "hapmap2");
		context.setInput("phasing", "eagle");

		// create step instance
		InputValidation inputValidation = new InputValidationMock(configFolder);

		// run and test
		boolean result = run(context, inputValidation);

		// check if step is failed
		assertEquals(true, result);
		assertTrue(context.hasInMemory("[OK] 1 valid VCF file(s) found."));

		
		// test tabix index and count snps
		String vcfFilename = inputFolder + "/minimac_test.50.vcf.gz";
		VCFFileReader vcfReader = new VCFFileReader(new File(vcfFilename),
				new File(vcfFilename + TabixUtils.STANDARD_INDEX_EXTENSION), true);
		CloseableIterator<VariantContext> snps = vcfReader.query("1", 1, 1000000000);
		int count = 0;
		while (snps.hasNext()) {
			snps.next();
			count++;
		}
		snps.close();
		vcfReader.close();
		
		//check snps
		assertEquals(905, count);

	}
 
Example 12
Source File: PurpleStructuralVariantSupplier.java    From hmftools with GNU General Public License v3.0 5 votes vote down vote up
PurpleStructuralVariantSupplier(@NotNull final String version, @NotNull final String templateVCF, @NotNull final String outputVCF,
        @NotNull final String refGenomePath) {
    final VCFFileReader vcfReader = new VCFFileReader(new File(templateVCF), false);
    this.outputVCF = outputVCF;
    this.refGenomePath = refGenomePath;
    this.header = Optional.of(generateOutputHeader(version, vcfReader.getFileHeader()));
    this.variants = new VariantContextCollectionImpl(header.get());

    for (VariantContext context : vcfReader) {
        variants.add(context);
    }

    vcfReader.close();
}
 
Example 13
Source File: ByIntervalListVariantContextIteratorTest.java    From picard with MIT License 5 votes vote down vote up
@Test
public void testNoOverlapDifferentContig() {
    final IntervalList intervalList         = new IntervalList(header);
    intervalList.add(new Interval("3", 167166899, 167166899));
    final VCFFileReader reader              = getReader(CEU_TRIOS_SNPS_VCF);
    final Iterator<VariantContext> iterator = new ByIntervalListVariantContextIterator(reader, intervalList);
    Assert.assertFalse(iterator.hasNext());
    reader.close();
}
 
Example 14
Source File: ThreadsafeTest.java    From picard with MIT License 5 votes vote down vote up
/** This test doesn't even test the class, it just makes sure the cornercase test data is really a cornercase */
@Test
public void ensureTestDataActuallyHasWideVariantAtTenMillion() {
    final Joiner joiner = Joiner.on(":"); // Cheat: do a string compare
    final VCFFileReader r = new VCFFileReader(VCF_WITH_MULTI_ALLELIC_VARIANT_AT_POSITION_10MILLION);
    Assert.assertEquals(
            joiner.join(r.query("1", TEN_MILLION, TEN_MILLION)),
            joiner.join(r.query("1", TEN_MILLION + 5, TEN_MILLION + 5))
    );
    r.close();
}
 
Example 15
Source File: FastVCFFileReader.java    From imputationserver with GNU Affero General Public License v3.0 5 votes vote down vote up
public FastVCFFileReader(String vcfFilename) throws IOException {

		super(vcfFilename);
		// load header
		VCFFileReader reader = new VCFFileReader(new File(vcfFilename), false);
		VCFHeader header = reader.getFileHeader();
		samples = header.getGenotypeSamples();
		samplesCount = samples.size();
		variantContext = new MinimalVariantContext(samplesCount);
		reader.close();

		parser = new VCFLineParser(samplesCount);

	}
 
Example 16
Source File: StrelkaPostProcessApplication.java    From hmftools with GNU General Public License v3.0 5 votes vote down vote up
private static void processVariants(@NotNull final String filePath, @NotNull final Slicer highConfidenceSlicer,
        @NotNull final String outputVcf, @NotNull final String sampleName, @NotNull final String tumorBam) {
    final VCFFileReader vcfReader = new VCFFileReader(new File(filePath), false);
    final VCFHeader outputHeader = generateOutputHeader(vcfReader.getFileHeader(), sampleName);
    final VariantContextWriter writer = new VariantContextWriterBuilder().setOutputFile(outputVcf)
            .setReferenceDictionary(outputHeader.getSequenceDictionary())
            .build();
    writer.writeHeader(outputHeader);
    final MNVValidator validator = ImmutableMNVValidator.of(tumorBam);
    final MNVMerger merger = ImmutableMNVMerger.of(outputHeader);

    Pair<PotentialMNVRegion, Optional<PotentialMNVRegion>> outputPair = ImmutablePair.of(PotentialMNVRegion.empty(), Optional.empty());

    final VariantContextFilter filter = new StrelkaPostProcess(highConfidenceSlicer);
    for (final VariantContext variantContext : vcfReader) {
        if (filter.test(variantContext)) {
            final VariantContext simplifiedVariant = StrelkaPostProcess.simplifyVariant(variantContext, sampleName);
            final PotentialMNVRegion potentialMNV = outputPair.getLeft();
            outputPair = MNVDetector.addMnvToRegion(potentialMNV, simplifiedVariant);
            outputPair.getRight().ifPresent(mnvRegion -> validator.mergeVariants(mnvRegion, merger).forEach(writer::add));
        }
    }
    validator.mergeVariants(outputPair.getLeft(), merger).forEach(writer::add);
    writer.close();
    vcfReader.close();
    LOGGER.info("Written output variants to " + outputVcf);
}
 
Example 17
Source File: MNVDetectorApplication.java    From hmftools with GNU General Public License v3.0 5 votes vote down vote up
private static void processVariants(@NotNull final String filePath, @NotNull final String outputVcf, @NotNull final String outputBed,
        boolean strelka) throws IOException {
    final VCFFileReader vcfReader = new VCFFileReader(new File(filePath), false);
    final VCFHeader outputHeader =
            strelka ? generateOutputHeader(vcfReader.getFileHeader(), StrelkaPostProcess.TUMOR_GENOTYPE) : vcfReader.getFileHeader();
    final BufferedWriter bedWriter = new BufferedWriter(new FileWriter(outputBed, false));
    final VariantContextWriter vcfWriter = new VariantContextWriterBuilder().setOutputFile(outputVcf)
            .setReferenceDictionary(outputHeader.getSequenceDictionary())
            .build();
    vcfWriter.writeHeader(outputHeader);

    Pair<PotentialMNVRegion, Optional<PotentialMNVRegion>> outputPair = ImmutablePair.of(PotentialMNVRegion.empty(), Optional.empty());
    for (final VariantContext rawVariant : vcfReader) {
        final VariantContext variant =
                strelka ? StrelkaPostProcess.simplifyVariant(rawVariant, StrelkaPostProcess.TUMOR_GENOTYPE) : rawVariant;

        final PotentialMNVRegion potentialMNVregion = outputPair.getLeft();
        outputPair = MNVDetector.addMnvToRegion(potentialMNVregion, variant);
        outputPair.getRight()
                .ifPresent(mnvRegion -> filterMnvRegion(mnvRegion).ifPresent(filteredRegion -> writeMnvRegionToFiles(filteredRegion,
                        vcfWriter,
                        bedWriter,
                        "\n")));
    }
    filterMnvRegion(outputPair.getLeft()).ifPresent(mnvRegion -> writeMnvRegionToFiles(mnvRegion, vcfWriter, bedWriter, ""));
    vcfWriter.close();
    vcfReader.close();
    bedWriter.close();
    LOGGER.info("Written output variants to {}. Written bed regions to {}.", outputVcf, outputBed);
}
 
Example 18
Source File: PonApplication.java    From hmftools with GNU General Public License v3.0 5 votes vote down vote up
private void run() throws IOException, ExecutionException, InterruptedException {

        if (files.isEmpty()) {
            return;
        }

        final VCFFileReader dictionaryReader = new VCFFileReader(files.get(0), true);
        SAMSequenceDictionary dictionary = dictionaryReader.getFileHeader().getSequenceDictionary();
        dictionaryReader.close();

        for (SAMSequenceRecord samSequenceRecord : dictionary.getSequences()) {
            LOGGER.info("Processing sequence {}", samSequenceRecord.getSequenceName());
            final PonBuilder ponBuilder = new PonBuilder();
            final RunnableTaskCompletion runnableTaskCompletion = new RunnableTaskCompletion();

            List<Future<?>> contigFutures = Lists.newArrayList();

            for (Path file : Files.newDirectoryStream(new File(input).toPath(), GLOB)) {
                Runnable runnable = () -> addVariantsFromFileToBuilder(ponBuilder, samSequenceRecord, file);
                contigFutures.add(executorService.submit(runnableTaskCompletion.task(runnable)));
            }

            for (Future<?> contigFuture : contigFutures) {
                contigFuture.get();
            }

            vcf.write(ponBuilder.build());
        }
    }
 
Example 19
Source File: ImputationChrXTest.java    From imputationserver with GNU Affero General Public License v3.0 4 votes vote down vote up
@Test
public void testPipelineChrXWithEaglePhasingOnly() throws IOException, ZipException {
	
	if (!new File(
			"test-data/configs/hapmap-chrX-hg38/ref-panels/ALL.X.nonPAR.phase1_v3.snps_indels_svs.genotypes.all.noSingleton.recode.hg38.bcf")
					.exists()) {
		System.out.println("chrX bcf nonPAR file not available");
		return;
	}


	String configFolder = "test-data/configs/hapmap-chrX";
	String inputFolder = "test-data/data/chrX-unphased";

	// create workflow context
	WorkflowTestContext context = buildContext(inputFolder, "phase1");
	
	context.setInput("mode", "phasing");

	// run qc to create chunkfile
	QcStatisticsMock qcStats = new QcStatisticsMock(configFolder);
	boolean result = run(context, qcStats);

	assertTrue(result);

	// add panel to hdfs
	importRefPanel(FileUtil.path(configFolder, "ref-panels"));
	// importMinimacMap("test-data/B38_MAP_FILE.map");
	importBinaries("files/bin");

	// run imputation
	ImputationMinimac3Mock imputation = new ImputationMinimac3Mock(configFolder);
	result = run(context, imputation);
	assertTrue(result);

	// run export
	CompressionEncryptionMock export = new CompressionEncryptionMock("files");
	result = run(context, export);
	assertTrue(result);

	ZipFile zipFile = new ZipFile("test-data/tmp/local/chr_X.zip", PASSWORD.toCharArray());
	zipFile.extractAll("test-data/tmp");

	VcfFile vcfFile = VcfFileUtil.load("test-data/tmp/chrX.phased.vcf.gz", 100000000, false);
	
	assertEquals(true, vcfFile.isPhased());
	
	VCFFileReader vcfReader = new VCFFileReader(new File(vcfFile.getVcfFilename()), false);
	
	CloseableIterator<VariantContext> it = vcfReader.iterator();

	while (it.hasNext()) {

		VariantContext line = it.next();

		if (line.getStart() == 44322058) {
			assertEquals("A", line.getGenotype("HG00096").getGenotypeString());
			System.out.println(line.getGenotype("HG00097").getGenotypeString());
			assertEquals("A|A", line.getGenotype("HG00097").getGenotypeString());
		}
	}
	
	vcfReader.close();

	FileUtil.deleteDirectory("test-data/tmp");

}
 
Example 20
Source File: ImputationChrXTest.java    From imputationserver with GNU Affero General Public License v3.0 4 votes vote down vote up
@Test
public void testChrXLeaveOneOutPipelinePhased() throws IOException, ZipException {

	// SNP 26963697 from input excluded and imputed!
	// true genotypes:
	// 1,1|1,1|1,1|1,1,1|1,1,1|1,1|1,1,0,1|1,1|0,1,1,1,1,1,1|1,1,1|1,1|1,1|1,1|1,1|1,1|0,

	String configFolder = "test-data/configs/hapmap-chrX";
	String inputFolder = "test-data/data/chrX-phased-loo";

	File file = new File("test-data/tmp");
	if (file.exists()) {
		FileUtil.deleteDirectory(file);
	}

	// create workflow context
	WorkflowTestContext context = buildContext(inputFolder, "phase1");

	// run qc to create chunkfile
	QcStatisticsMock qcStats = new QcStatisticsMock(configFolder);
	boolean result = run(context, qcStats);

	assertTrue(result);

	// add panel to hdfs
	importRefPanel(FileUtil.path(configFolder, "ref-panels"));
	// importMinimacMap("test-data/B38_MAP_FILE.map");
	importBinaries("files/bin");

	// run imputation
	ImputationMinimac3Mock imputation = new ImputationMinimac3Mock(configFolder);
	result = run(context, imputation);
	assertTrue(result);

	// run export
	CompressionEncryptionMock export = new CompressionEncryptionMock("files");
	result = run(context, export);
	assertTrue(result);

	ZipFile zipFile = new ZipFile("test-data/tmp/local/chr_X.zip", PASSWORD.toCharArray());
	zipFile.extractAll("test-data/tmp");

	VcfFile vcfFile = VcfFileUtil.load("test-data/tmp/chrX.dose.vcf.gz", 100000000, false);

	VCFFileReader vcfReader = new VCFFileReader(new File(vcfFile.getVcfFilename()), false);

	CloseableIterator<VariantContext> it = vcfReader.iterator();

	while (it.hasNext()) {

		VariantContext line = it.next();

		if (line.getStart() == 26963697) {
			assertEquals(2, line.getHetCount());
			assertEquals(1, line.getHomRefCount());
			assertEquals(23, line.getHomVarCount());

		}
	}

	vcfReader.close();

	FileUtil.deleteDirectory(file);

}