Java Code Examples for htsjdk.variant.vcf.VCFFileReader#iterator()

The following examples show how to use htsjdk.variant.vcf.VCFFileReader#iterator() . You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: TestFilterVcf.java    From picard with MIT License 6 votes vote down vote up
@Test(dataProvider = "goodInputVcfs")
public void testJavaScript(final File input) throws Exception {
    final File out = VcfTestUtils.createTemporaryIndexedFile("filterVcfTestJS.", ".vcf");
    final FilterVcf filterer = new FilterVcf();
    filterer.INPUT = input;
    filterer.OUTPUT = out;
    filterer.JAVASCRIPT_FILE = quickJavascriptFilter("variant.getStart()%5 != 0");

    final int retval = filterer.doWork();
    Assert.assertEquals(retval, 0);

    //count the number of reads
    final int expectedNumber = 4;
    int count = 0;
    VCFFileReader in = new VCFFileReader(filterer.OUTPUT, false);
    CloseableIterator<VariantContext> iter = in.iterator();
    while (iter.hasNext()) {
        final VariantContext ctx = iter.next();
        count += (ctx.isFiltered() ? 1 : 0);
    }
    iter.close();
    in.close();
    Assert.assertEquals(count, expectedNumber);
}
 
Example 2
Source File: AbstractVcfMergingClpTester.java    From picard with MIT License 6 votes vote down vote up
/**
 * Make sure that the order of the output file is identical to the order
 * of the input files by iterating through the output, making sure that,
 * if the context is an indel (snp), the next genomic position in the indel
 * (snp) queue is the same. Also make sure that the context is in the order
 * specified by the input files.
 */
private void validateSnpAndIndelResults(final File output, final Queue<String> indelContigPositions, final Queue<String> snpContigPositions) {
    final VCFFileReader outputReader = new VCFFileReader(output, false);
    final VariantContextComparator outputComparator = outputReader.getFileHeader().getVCFRecordComparator();
    VariantContext last = null;
    final CloseableIterator<VariantContext> iterator = outputReader.iterator();
    while (iterator.hasNext()) {
        final VariantContext outputContext = iterator.next();
        if (outputContext.isIndel()) Assert.assertEquals(getContigPosition(outputContext), indelContigPositions.poll());
        if (outputContext.isSNP()) Assert.assertEquals(getContigPosition(outputContext), snpContigPositions.poll());
        if (last != null) Assert.assertTrue(outputComparator.compare(last, outputContext) <= 0);
        last = outputContext;
    }
    iterator.close();

    // We should have polled everything off the indel (snp) queues
    Assert.assertEquals(indelContigPositions.size(), 0);
    Assert.assertEquals(snpContigPositions.size(), 0);
}
 
Example 3
Source File: SortVcfsTest.java    From picard with MIT License 6 votes vote down vote up
/**
 * Checks the ordering and total number of variant context entries in the specified output VCF file.
 * Does NOT check explicitly that the VC genomic positions match exactly those from the inputs. We assume this behavior from other tests.
 *
 * @param output VCF file representing the output of SortVCF
 * @param expectedVariantContextCount the total number of variant context entries from all input files that were merged/sorted
 */
private void validateSortingResults(final File output, final int expectedVariantContextCount) {
    final VCFFileReader outputReader = new VCFFileReader(output, false);
    final VariantContextComparator outputComparator = outputReader.getFileHeader().getVCFRecordComparator();
    VariantContext last = null;
    int variantContextCount = 0;
    final CloseableIterator<VariantContext> iterator = outputReader.iterator();
    while (iterator.hasNext()) {
        final VariantContext outputContext = iterator.next();
        if (last != null) Assert.assertTrue(outputComparator.compare(last, outputContext) <= 0);
        last = outputContext;
        variantContextCount++;
    }
    iterator.close();
    Assert.assertEquals(variantContextCount, expectedVariantContextCount);
}
 
Example 4
Source File: UpdateVcfSequenceDictionary.java    From picard with MIT License 5 votes vote down vote up
@Override
protected int doWork() {
    IOUtil.assertFileIsReadable(INPUT);
    IOUtil.assertFileIsReadable(SEQUENCE_DICTIONARY);
    IOUtil.assertFileIsWritable(OUTPUT);

    final SAMSequenceDictionary samSequenceDictionary = SAMSequenceDictionaryExtractor.extractDictionary(SEQUENCE_DICTIONARY.toPath());

    final VCFFileReader fileReader = new VCFFileReader(INPUT, false);
    final VCFHeader fileHeader = fileReader.getFileHeader();

    final VariantContextWriterBuilder builder = new VariantContextWriterBuilder()
            .setReferenceDictionary(samSequenceDictionary)
            .clearOptions();
    if (CREATE_INDEX)
        builder.setOption(Options.INDEX_ON_THE_FLY);

    final VariantContextWriter vcfWriter = builder.setOutputFile(OUTPUT).build();
    fileHeader.setSequenceDictionary(samSequenceDictionary);
    vcfWriter.writeHeader(fileHeader);

    final ProgressLogger progress = new ProgressLogger(log, 10000);
    final CloseableIterator<VariantContext> iterator = fileReader.iterator();
    while (iterator.hasNext()) {
        final VariantContext context = iterator.next();
        vcfWriter.add(context);
        progress.record(context.getContig(), context.getStart());
    }

    CloserUtil.close(iterator);
    CloserUtil.close(fileReader);
    vcfWriter.close();

    return 0;
}
 
Example 5
Source File: DbSnpBitSetUtil.java    From gatk with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
/** Private helper method to read through the VCF and create one or more bit sets. */
private static void loadVcf(final File dbSnpFile,
                            final SAMSequenceDictionary sequenceDictionary,
                            final Map<DbSnpBitSetUtil, Set<DbSnpVariantType>> bitSetsToVariantTypes) {

    final VCFFileReader variantReader = new VCFFileReader(dbSnpFile);
    final CloseableIterator<VariantContext> variantIterator = variantReader.iterator();

    while (variantIterator.hasNext()) {
        final VariantContext kv = variantIterator.next();

        for (final Map.Entry<DbSnpBitSetUtil, Set<DbSnpVariantType>> tuple : bitSetsToVariantTypes.entrySet()) {
            final DbSnpBitSetUtil bitset            = tuple.getKey();
            final Set<DbSnpVariantType> variantsToMatch  = tuple.getValue();

            BitSet bits = bitset.sequenceToBitSet.get(kv.getContig());
            if (bits == null) {
                final int nBits;
                if (sequenceDictionary == null) nBits = kv.getEnd() + 1;
                else nBits = sequenceDictionary.getSequence(kv.getContig()).getSequenceLength() + 1;
                bits = new BitSet(nBits);
                bitset.sequenceToBitSet.put(kv.getContig(), bits);
            }
            if (variantsToMatch.isEmpty() ||
                    (kv.isSNP() && variantsToMatch.contains(DbSnpVariantType.SNP)) ||
                    (kv.isIndel() && variantsToMatch.contains(DbSnpVariantType.insertion)) ||
                    (kv.isIndel() && variantsToMatch.contains(DbSnpVariantType.deletion))) {

                for (int i = kv.getStart(); i <= kv.getEnd(); i++)  bits.set(i, true);
            }
        }
    }

    CloserUtil.close(variantIterator);
    CloserUtil.close(variantReader);
}
 
Example 6
Source File: TestVCFInputFormat.java    From Hadoop-BAM with MIT License 5 votes vote down vote up
@Test
public void countEntries() throws Exception {
    VCFFileReader vcfFileReader =
        new VCFFileReader(new File("src/test/resources/" + filename), false);
    Iterator<VariantContext> variantIterator;
    if (interval == null) {
        variantIterator = vcfFileReader.iterator();
    } else {
        variantIterator = vcfFileReader.query(interval.getContig(),
            interval.getStart(), interval.getEnd());
    }
    int expectedCount = Iterators.size(variantIterator);

    int counter = 0;
    for (RecordReader<LongWritable, VariantContextWritable> reader : readers) {
        while (reader.nextKeyValue()) {
            writable = reader.getCurrentValue();
            assertNotNull(writable);
            VariantContext vc = writable.get();
            assertNotNull(vc);
            String value = vc.toString();
            assertNotNull(value);
            counter++;
        }
    }
    assertEquals(expectedCount, counter);
}
 
Example 7
Source File: HaplotypeMapTest.java    From picard with MIT License 5 votes vote down vote up
@Test(dataProvider = "haplotypeMapForWriting")
public void testHaplotypeMapWriteToVcf(final HaplotypeMap haplotypeMap) throws Exception {
    final File temp = File.createTempFile("haplotypeMap", ".vcf");
    temp.deleteOnExit();
    haplotypeMap.writeAsVcf(temp, TEST_FASTA);

    final VCFFileReader reader = new VCFFileReader(temp);

    Assert.assertEquals(reader.getFileHeader().getNGenotypeSamples(), 1, "VCF should have exactly one sample");
    Assert.assertEquals(reader.getFileHeader().getSampleNamesInOrder().get(0), HaplotypeMap.HET_GENOTYPE_FOR_PHASING, "VCF sole sample should be " + HaplotypeMap.HET_GENOTYPE_FOR_PHASING);

    final Iterator<VariantContext> iter = reader.iterator();
    final VariantContext first = iter.next();
    Assert.assertEquals(first.getContig(), "chr1", "Wrong chromosome on first snp: " + first);
    Assert.assertEquals(first.getID(), "snp1", "Wrong name on first snp: " + first);
    Assert.assertEquals(first.getGenotype(0).getExtendedAttribute(VCFConstants.PHASE_SET_KEY), Integer.toString(first.getStart()), "anchor snp should have PS equal to its position " + first);
    Assert.assertEquals(first.getAttributeAsDouble(VCFConstants.ALLELE_FREQUENCY_KEY, 0D), 1 - 0.15); // because it's swapped w.r.t the reference

    final VariantContext second = iter.next();
    Assert.assertEquals(second.getContig(), "chr1", "Wrong chromosome on second snp: " + second);
    Assert.assertEquals(second.getID(), "snp2", "Wrong name on second snp: " + second);
    Assert.assertEquals(second.getGenotype(0).getExtendedAttribute(VCFConstants.PHASE_SET_KEY), Integer.toString(first.getStart()), "Phase set is incorrect on second snp: " + second);
    Assert.assertEquals(second.getAttributeAsDouble(VCFConstants.ALLELE_FREQUENCY_KEY, 0D), 0.16);

    final VariantContext third = iter.next();
    Assert.assertEquals(third.getContig(), "chr2", "Wrong chromosome on third snp: " + third);
    Assert.assertEquals(third.getID(), "snp3", "Wrong name on third snp: " + third);
    Assert.assertFalse (third.getGenotype(0).hasAnyAttribute(VCFConstants.PHASE_SET_KEY), "Third snp should not have a phaseset" + third);
    Assert.assertEquals(third.getAttributeAsDouble(VCFConstants.ALLELE_FREQUENCY_KEY, 0D), 0.2);
}
 
Example 8
Source File: AbstractVcfMergingClpTester.java    From picard with MIT License 5 votes vote down vote up
static Queue<String> loadContigPositions(final File inputFile) {
	final VCFFileReader reader = new VCFFileReader(inputFile, false);
	final Queue<String> contigPositions = new LinkedList<String>();
	final CloseableIterator<VariantContext> iterator = reader.iterator();
	while (iterator.hasNext()) contigPositions.add(getContigPosition(iterator.next()));
	iterator.close();
	reader.close();
	return contigPositions;
}
 
Example 9
Source File: CollectIndependentReplicateMetrics.java    From picard with MIT License 5 votes vote down vote up
private SortedMap<QueryInterval, List<Allele>> getQueryIntervalsMap(final File vcf) {

        final Map<String, Integer> contigIndexMap = new HashMap<>();
        final VCFFileReader vcfReader = new VCFFileReader(vcf, false);

        // We want to look at unfiltered SNP sites for which the sample is genotyped as a het
        // with high quality.
        final CompoundFilter compoundFilter = new CompoundFilter(true);
        compoundFilter.add(new SnpFilter());
        compoundFilter.add(new PassingVariantFilter());
        compoundFilter.add(new GenotypeQualityFilter(MINIMUM_GQ, SAMPLE));
        compoundFilter.add(new HeterozygosityFilter(true, SAMPLE));

        final Iterator<VariantContext> hetIterator = new FilteringVariantContextIterator(vcfReader.iterator(), compoundFilter);

        for (final VCFContigHeaderLine vcfContig : vcfReader.getFileHeader().getContigLines()) {
            contigIndexMap.put(vcfContig.getID(), vcfContig.getContigIndex());
        }

        // return a TreeMap since the keys are comparable, and this will use their order in the iteration
        final SortedMap<QueryInterval, List<Allele>> map = new TreeMap<>();

        while (hetIterator.hasNext()) {
            final VariantContext vc = hetIterator.next();
            map.put(new QueryInterval(contigIndexMap.get(vc.getContig()), vc.getStart(), vc.getEnd()), vc.getGenotype(SAMPLE).getAlleles());
        }

        vcfReader.close();

        return map;
    }
 
Example 10
Source File: VcfFormatConverter.java    From picard with MIT License 5 votes vote down vote up
@Override
protected int doWork() {
    final ProgressLogger progress = new ProgressLogger(LOG, 10000);
    
    IOUtil.assertFileIsReadable(INPUT);
    IOUtil.assertFileIsWritable(OUTPUT);

    final VCFFileReader reader = new VCFFileReader(INPUT, REQUIRE_INDEX);
    final VCFHeader header = new VCFHeader(reader.getFileHeader());
    final SAMSequenceDictionary sequenceDictionary = header.getSequenceDictionary();
    if (CREATE_INDEX && sequenceDictionary == null) {
        throw new PicardException("A sequence dictionary must be available in the input file when creating indexed output.");
    }

    final VariantContextWriterBuilder builder = new VariantContextWriterBuilder()
            .setOutputFile(OUTPUT)
            .setReferenceDictionary(sequenceDictionary);
    if (CREATE_INDEX)
        builder.setOption(Options.INDEX_ON_THE_FLY);
    else
        builder.unsetOption(Options.INDEX_ON_THE_FLY);
    final VariantContextWriter writer = builder.build();
    writer.writeHeader(header);
    final CloseableIterator<VariantContext> iterator = reader.iterator();

    while (iterator.hasNext()) {
        final VariantContext context = iterator.next();
        writer.add(context);
        progress.record(context.getContig(), context.getStart());
    }

    CloserUtil.close(iterator);
    CloserUtil.close(reader);
    writer.close();

    return 0;
}
 
Example 11
Source File: FastQualityControlTest.java    From imputationserver with GNU Affero General Public License v3.0 5 votes vote down vote up
public void testCountSamplesInCreatedChunk() throws IOException {

		String configFolder = "test-data/configs/hapmap-chr1";
		String inputFolder = "test-data/data/simulated-chip-1chr-imputation";

		// create workflow context
		WorkflowTestContext context = buildContext(inputFolder, "hapmap2");

		// get output directory
		String out = context.getOutput("chunksDir");

		// create step instance
		FastQualityControlMock qcStats = new FastQualityControlMock(configFolder);

		// run and test
		assertTrue(run(context, qcStats));
		for (File file : new File(out).listFiles()) {
			if (file.getName().endsWith("chunk_1_80000001_100000000.vcf.gz")) {
				VCFFileReader vcfReader = new VCFFileReader(file, false);
				CloseableIterator<VariantContext> it = vcfReader.iterator();
				if (it.hasNext()) {
					VariantContext a = it.next();
					assertEquals(255, a.getNSamples());
				}
				vcfReader.close();
			}

		}

	}
 
Example 12
Source File: FastQualityControlTest.java    From imputationserver with GNU Affero General Public License v3.0 5 votes vote down vote up
public void testCountSitesForOneChunkedContig() throws IOException {

		String configFolder = "test-data/configs/hapmap-chr1";
		String inputFolder = "test-data/data/simulated-chip-1chr-imputation";

		// create workflow context
		WorkflowTestContext context = buildContext(inputFolder, "hapmap2");

		String out = context.getOutput("chunksDir");

		// create step instance
		FastQualityControlMock qcStats = new FastQualityControlMock(configFolder);

		// run and test
		assertTrue(run(context, qcStats));

		File[] files = new File(out).listFiles();
		Arrays.sort(files);

		// baseline from a earlier job execution
		int[] array = { 4588, 968, 3002, 5781, 5116, 4750, 5699, 5174, 6334, 3188, 5106, 5832, 5318 };
		int pos = 0;

		for (File file : files) {
			int count = 0;
			if (file.getName().endsWith(".gz")) {
				VCFFileReader vcfReader = new VCFFileReader(file, false);
				CloseableIterator<VariantContext> it = vcfReader.iterator();
				while (it.hasNext()) {
					it.next();
					count++;
				}
				assertEquals(array[pos], count);
				vcfReader.close();
				pos++;
			}

		}

	}
 
Example 13
Source File: CreateSnpIntervalFromVcf.java    From Drop-seq with MIT License 4 votes vote down vote up
public IntervalList processData(final File vcfFile, final File sdFile, final Set<String> sample, int GQThreshold, final boolean hetSNPsOnly) {

		final VCFFileReader reader = new VCFFileReader(vcfFile, false);
		if (!VCFUtils.GQInHeader(reader)) {
			GQThreshold=-1;
			log.info("Genotype Quality [GQ] not found in header.  Disabling GQ_THRESHOLD parameter");
		}
		
		final VCFHeader inputVcfHeader = new VCFHeader(reader.getFileHeader().getMetaDataInInputOrder());
		SAMSequenceDictionary sequenceDictionary = inputVcfHeader.getSequenceDictionary();
		Set<String> sampleListFinal = sample;
		if (sample==null || sample.isEmpty()) {
			ArrayList<String> s = reader.getFileHeader().getSampleNamesInOrder();
			sampleListFinal=new TreeSet<String>(s);
		}

		if (sdFile != null)
			sequenceDictionary = getSequenceDictionary(sdFile);

		final ProgressLogger progress = new ProgressLogger(this.log, 500000);

		final SAMFileHeader samHeader = new SAMFileHeader();
		samHeader.setSequenceDictionary(sequenceDictionary);
		IntervalList result = new IntervalList(samHeader);

		// Go through the input, find sites we want to keep.
		final PeekableIterator<VariantContext> iterator = new PeekableIterator<>(reader.iterator());

		validateRequestedSamples (iterator, sampleListFinal);

		while (iterator.hasNext()) {
			final VariantContext site = iterator.next();
			progress.record(site.getContig(), site.getStart());
			// for now drop any filtered site.
			if (site.isFiltered())
				continue;
			// move onto the next record if the site is not a SNP or the samples aren't all heterozygous.
			if (!site.isSNP())
				continue;
			if (!sitePassesFilters(site, sampleListFinal, GQThreshold, hetSNPsOnly))
				continue;
			Interval varInt = new Interval(site.getContig(), site.getStart(),
					site.getEnd(), true, site.getID());

			// final Interval site = findHeterozygousSites(full, SAMPLE);
			result.add(varInt);

		}

		CloserUtil.close(iterator);
		CloserUtil.close(reader);
		return (result);
	}
 
Example 14
Source File: SplitVcfs.java    From picard with MIT License 4 votes vote down vote up
@Override
protected int doWork() {
    IOUtil.assertFileIsReadable(INPUT);
    final ProgressLogger progress = new ProgressLogger(log, 10000);

    final VCFFileReader fileReader = new VCFFileReader(INPUT, false);
    final VCFHeader fileHeader = fileReader.getFileHeader();

    final SAMSequenceDictionary sequenceDictionary =
            SEQUENCE_DICTIONARY != null
                    ? SamReaderFactory.makeDefault().referenceSequence(REFERENCE_SEQUENCE).getFileHeader(SEQUENCE_DICTIONARY).getSequenceDictionary()
                    : fileHeader.getSequenceDictionary();
    if (CREATE_INDEX && sequenceDictionary == null) {
        throw new PicardException("A sequence dictionary must be available (either through the input file or by setting it explicitly) when creating indexed output.");
    }

    final VariantContextWriterBuilder builder = new VariantContextWriterBuilder()
            .setReferenceDictionary(sequenceDictionary)
            .clearOptions();
    if (CREATE_INDEX)
        builder.setOption(Options.INDEX_ON_THE_FLY);

    final VariantContextWriter snpWriter = builder.setOutputFile(SNP_OUTPUT).build();
    final VariantContextWriter indelWriter = builder.setOutputFile(INDEL_OUTPUT).build();
    snpWriter.writeHeader(fileHeader);
    indelWriter.writeHeader(fileHeader);

    int incorrectVariantCount = 0;

    final CloseableIterator<VariantContext> iterator = fileReader.iterator();
    while (iterator.hasNext()) {
        final VariantContext context = iterator.next();
        if (context.isIndel()) indelWriter.add(context);
        else if (context.isSNP()) snpWriter.add(context);
        else {
            if (STRICT) throw new IllegalStateException("Found a record with type " + context.getType().name());
            else incorrectVariantCount++;
        }

        progress.record(context.getContig(), context.getStart());
    }

    if (incorrectVariantCount > 0) {
        log.debug("Found " + incorrectVariantCount + " records that didn't match SNP or INDEL");
    }

    CloserUtil.close(iterator);
    CloserUtil.close(fileReader);
    snpWriter.close();
    indelWriter.close();

    return 0;
}
 
Example 15
Source File: DbSnpBitSetUtil.java    From picard with MIT License 4 votes vote down vote up
/** Private helper method to read through the VCF and create one or more bit sets. */
private static void loadVcf(final File dbSnpFile,
                            final SAMSequenceDictionary sequenceDictionary,
                            final Map<DbSnpBitSetUtil, Set<VariantType>> bitSetsToVariantTypes,
                            final IntervalList intervals,
                            final Optional<Log> log) {

    final Optional<ProgressLogger> progress = log.map(l -> new ProgressLogger(l, (int) 1e5, "Read", "variants"));
    final VCFFileReader variantReader = new VCFFileReader(dbSnpFile, intervals != null);
    final Iterator<VariantContext> variantIterator;
    if (intervals != null) {
        variantIterator = new ByIntervalListVariantContextIterator(variantReader, intervals);
    }
    else {
        variantIterator = variantReader.iterator();
    }

    while (variantIterator.hasNext()) {
        final VariantContext kv = variantIterator.next();

        for (final Map.Entry<DbSnpBitSetUtil, Set<VariantType>> tuple : bitSetsToVariantTypes.entrySet()) {
            final DbSnpBitSetUtil bitset            = tuple.getKey();
            final Set<VariantType> variantsToMatch  = tuple.getValue();

            BitSet bits = bitset.sequenceToBitSet.get(kv.getContig());
            if (bits == null) {
                final int nBits;
                if (sequenceDictionary == null) nBits = kv.getEnd() + 1;
                else nBits = sequenceDictionary.getSequence(kv.getContig()).getSequenceLength() + 1;
                bits = new BitSet(nBits);
                bitset.sequenceToBitSet.put(kv.getContig(), bits);
            }
            if (variantsToMatch.isEmpty() ||
                    (kv.isSNP() && variantsToMatch.contains(VariantType.SNP)) ||
                    (kv.isIndel() && variantsToMatch.contains(VariantType.insertion)) ||
                    (kv.isIndel() && variantsToMatch.contains(VariantType.deletion))) {

                for (int i = kv.getStart(); i <= kv.getEnd(); i++)  bits.set(i, true);
            }
        }
        progress.map(p -> p.record(kv.getContig(), kv.getStart()));
    }

    CloserUtil.close(variantReader);
}
 
Example 16
Source File: MakeSitesOnlyVcf.java    From picard with MIT License 4 votes vote down vote up
@Override
protected int doWork() {
    IOUtil.assertFileIsReadable(INPUT);
    IOUtil.assertFileIsWritable(OUTPUT);

    final VCFFileReader reader = new VCFFileReader(INPUT, false);
    final VCFHeader inputVcfHeader = new VCFHeader(reader.getFileHeader().getMetaDataInInputOrder());
    final SAMSequenceDictionary sequenceDictionary = inputVcfHeader.getSequenceDictionary();

    if (CREATE_INDEX && sequenceDictionary == null) {
        throw new PicardException("A sequence dictionary must be available (either through the input file or by setting it explicitly) when creating indexed output.");
    }

    final ProgressLogger progress = new ProgressLogger(Log.getInstance(MakeSitesOnlyVcf.class), 10000);

    // Setup the site-only file writer
    final VariantContextWriterBuilder builder = new VariantContextWriterBuilder()
            .setOutputFile(OUTPUT)
            .setReferenceDictionary(sequenceDictionary);
    if (CREATE_INDEX)
        builder.setOption(Options.INDEX_ON_THE_FLY);
    else
        builder.unsetOption(Options.INDEX_ON_THE_FLY);
    final VariantContextWriter writer = builder.build();

    final VCFHeader header = new VCFHeader(inputVcfHeader.getMetaDataInInputOrder(), SAMPLE);
    writer.writeHeader(header);

    // Go through the input, strip the records and write them to the output
    final CloseableIterator<VariantContext> iterator = reader.iterator();
    while (iterator.hasNext()) {
        final VariantContext full = iterator.next();
        final VariantContext site = subsetToSamplesWithOriginalAnnotations(full, SAMPLE);
        writer.add(site);
        progress.record(site.getContig(), site.getStart());
    }

    CloserUtil.close(iterator);
    CloserUtil.close(reader);
    writer.close();

    return 0;
}
 
Example 17
Source File: VcfToAdpc.java    From picard with MIT License 4 votes vote down vote up
@Override
protected int doWork() {
    final List<File> inputs = IOUtil.unrollFiles(VCF, IOUtil.VCF_EXTENSIONS);
    IOUtil.assertFilesAreReadable(inputs);
    IOUtil.assertFileIsWritable(SAMPLES_FILE);
    IOUtil.assertFileIsWritable(NUM_MARKERS_FILE);
    IOUtil.assertFileIsWritable(OUTPUT);
    final List<String> sampleNames = new ArrayList<>();

    Integer numberOfLoci = null;
    try (IlluminaAdpcFileWriter adpcFileWriter = new IlluminaAdpcFileWriter(OUTPUT)) {
        for (final File inputVcf : inputs) {
            VCFFileReader vcfFileReader = new VCFFileReader(inputVcf, false);
            final VCFHeader header = vcfFileReader.getFileHeader();
            for (int sampleNumber = 0; sampleNumber < header.getNGenotypeSamples(); sampleNumber++) {
                final String sampleName = header.getGenotypeSamples().get(sampleNumber);
                sampleNames.add(sampleName);
                log.info("Processing sample: " + sampleName + " from VCF: " + inputVcf.getAbsolutePath());

                CloseableIterator<VariantContext> variants = vcfFileReader.iterator();
                int lociCount = 0;
                while (variants.hasNext()) {
                    final VariantContext context = variants.next();
                    final float gcScore = getFloatAttribute(context, InfiniumVcfFields.GC_SCORE);

                    final Genotype genotype = context.getGenotype(sampleNumber);
                    final IlluminaGenotype illuminaGenotype = getIlluminaGenotype(genotype, context);

                    final int rawXIntensity = getUnsignedShortAttributeAsInt(genotype, InfiniumVcfFields.X);
                    final int rawYIntensity = getUnsignedShortAttributeAsInt(genotype, InfiniumVcfFields.Y);

                    final Float normalizedXIntensity = getFloatAttribute(genotype, InfiniumVcfFields.NORMX);
                    final Float normalizedYIntensity = getFloatAttribute(genotype, InfiniumVcfFields.NORMY);

                    final IlluminaAdpcFileWriter.Record record = new IlluminaAdpcFileWriter.Record(rawXIntensity, rawYIntensity, normalizedXIntensity, normalizedYIntensity, gcScore, illuminaGenotype);
                    adpcFileWriter.write(record);
                    lociCount++;
                }
                if (lociCount == 0) {
                    throw new PicardException("Found no records in VCF' " + inputVcf.getAbsolutePath() + "'");
                }
                if (numberOfLoci == null) {
                    numberOfLoci = lociCount;
                } else {
                    if (lociCount != numberOfLoci) {
                        throw new PicardException("VCFs have differing number of loci");
                    }
                }
            }
        }
        writeTextToFile(SAMPLES_FILE, StringUtils.join(sampleNames, "\n"));
        writeTextToFile(NUM_MARKERS_FILE, "" + numberOfLoci);
    } catch (Exception e) {
        log.error(e);
        return 1;
    }

    return 0;
}
 
Example 18
Source File: ImputationChrXTest.java    From imputationserver with GNU Affero General Public License v3.0 4 votes vote down vote up
@Test
public void testPipelineChrXWithEaglePhasingOnly() throws IOException, ZipException {
	
	if (!new File(
			"test-data/configs/hapmap-chrX-hg38/ref-panels/ALL.X.nonPAR.phase1_v3.snps_indels_svs.genotypes.all.noSingleton.recode.hg38.bcf")
					.exists()) {
		System.out.println("chrX bcf nonPAR file not available");
		return;
	}


	String configFolder = "test-data/configs/hapmap-chrX";
	String inputFolder = "test-data/data/chrX-unphased";

	// create workflow context
	WorkflowTestContext context = buildContext(inputFolder, "phase1");
	
	context.setInput("mode", "phasing");

	// run qc to create chunkfile
	QcStatisticsMock qcStats = new QcStatisticsMock(configFolder);
	boolean result = run(context, qcStats);

	assertTrue(result);

	// add panel to hdfs
	importRefPanel(FileUtil.path(configFolder, "ref-panels"));
	// importMinimacMap("test-data/B38_MAP_FILE.map");
	importBinaries("files/bin");

	// run imputation
	ImputationMinimac3Mock imputation = new ImputationMinimac3Mock(configFolder);
	result = run(context, imputation);
	assertTrue(result);

	// run export
	CompressionEncryptionMock export = new CompressionEncryptionMock("files");
	result = run(context, export);
	assertTrue(result);

	ZipFile zipFile = new ZipFile("test-data/tmp/local/chr_X.zip", PASSWORD.toCharArray());
	zipFile.extractAll("test-data/tmp");

	VcfFile vcfFile = VcfFileUtil.load("test-data/tmp/chrX.phased.vcf.gz", 100000000, false);
	
	assertEquals(true, vcfFile.isPhased());
	
	VCFFileReader vcfReader = new VCFFileReader(new File(vcfFile.getVcfFilename()), false);
	
	CloseableIterator<VariantContext> it = vcfReader.iterator();

	while (it.hasNext()) {

		VariantContext line = it.next();

		if (line.getStart() == 44322058) {
			assertEquals("A", line.getGenotype("HG00096").getGenotypeString());
			System.out.println(line.getGenotype("HG00097").getGenotypeString());
			assertEquals("A|A", line.getGenotype("HG00097").getGenotypeString());
		}
	}
	
	vcfReader.close();

	FileUtil.deleteDirectory("test-data/tmp");

}
 
Example 19
Source File: ImputationChrXTest.java    From imputationserver with GNU Affero General Public License v3.0 4 votes vote down vote up
@Test
public void testChrXLeaveOneOutPipelinePhased() throws IOException, ZipException {

	// SNP 26963697 from input excluded and imputed!
	// true genotypes:
	// 1,1|1,1|1,1|1,1,1|1,1,1|1,1|1,1,0,1|1,1|0,1,1,1,1,1,1|1,1,1|1,1|1,1|1,1|1,1|1,1|0,

	String configFolder = "test-data/configs/hapmap-chrX";
	String inputFolder = "test-data/data/chrX-phased-loo";

	File file = new File("test-data/tmp");
	if (file.exists()) {
		FileUtil.deleteDirectory(file);
	}

	// create workflow context
	WorkflowTestContext context = buildContext(inputFolder, "phase1");

	// run qc to create chunkfile
	QcStatisticsMock qcStats = new QcStatisticsMock(configFolder);
	boolean result = run(context, qcStats);

	assertTrue(result);

	// add panel to hdfs
	importRefPanel(FileUtil.path(configFolder, "ref-panels"));
	// importMinimacMap("test-data/B38_MAP_FILE.map");
	importBinaries("files/bin");

	// run imputation
	ImputationMinimac3Mock imputation = new ImputationMinimac3Mock(configFolder);
	result = run(context, imputation);
	assertTrue(result);

	// run export
	CompressionEncryptionMock export = new CompressionEncryptionMock("files");
	result = run(context, export);
	assertTrue(result);

	ZipFile zipFile = new ZipFile("test-data/tmp/local/chr_X.zip", PASSWORD.toCharArray());
	zipFile.extractAll("test-data/tmp");

	VcfFile vcfFile = VcfFileUtil.load("test-data/tmp/chrX.dose.vcf.gz", 100000000, false);

	VCFFileReader vcfReader = new VCFFileReader(new File(vcfFile.getVcfFilename()), false);

	CloseableIterator<VariantContext> it = vcfReader.iterator();

	while (it.hasNext()) {

		VariantContext line = it.next();

		if (line.getStart() == 26963697) {
			assertEquals(2, line.getHetCount());
			assertEquals(1, line.getHomRefCount());
			assertEquals(23, line.getHomVarCount());

		}
	}

	vcfReader.close();

	FileUtil.deleteDirectory(file);

}