Java Code Examples for htsjdk.variant.vcf.VCFFileReader#getFileHeader()

The following examples show how to use htsjdk.variant.vcf.VCFFileReader#getFileHeader() . You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: VcfToVariantTest.java    From genomewarp with Apache License 2.0 6 votes vote down vote up
@Test
/**
 * Note: HTSJDK cannot distinguish between PASS filters and
 * unfiltered for variant calls (it can for general filters).
 * As a result, all variant calls which PASS do not have filter
 * information applied.
 */
public void testGetCalls() throws Exception {
  File vcfFile =
      new File(VcfToVariant.class.getClassLoader().getResource(VALID_VCF_4_1).getFile());
  VCFFileReader vcfReader = new VCFFileReader(vcfFile, false);
  VCFHeader vcfHeader = vcfReader.getFileHeader();
  int currVariant = 0;

  for (final VariantContext vc : vcfReader) {
    List<VariantCall> callList = VcfToVariant.getCalls(vc, vcfHeader);
    assertEquals(callList, TRUTH.get(currVariant).getCallsList());
    currVariant++;
  }
}
 
Example 2
Source File: MergePedIntoVcf.java    From picard with MIT License 6 votes vote down vote up
@Override
protected int doWork() {
    IOUtil.assertFileIsReadable(PED_FILE);
    IOUtil.assertFileIsReadable(MAP_FILE);
    IOUtil.assertFileIsReadable(ZCALL_THRESHOLDS_FILE);
    IOUtil.assertFileIsWritable(OUTPUT);
    try {
        parseZCallThresholds();
        final ZCallPedFile zCallPedFile = ZCallPedFile.fromFile(PED_FILE, MAP_FILE);
        final VCFFileReader vcfFileReader = new VCFFileReader(ORIGINAL_VCF, false);
        final VCFHeader header = vcfFileReader.getFileHeader();
        if (header.getGenotypeSamples().size() > 1) {
            throw new PicardException("MergePedIntoVCF only works with single-sample VCFs.");
        }
        addAdditionalHeaderFields(header);
        writeVcf(vcfFileReader.iterator(), OUTPUT, vcfFileReader.getFileHeader().getSequenceDictionary(), header, zCallPedFile);
    } catch (Exception e) {
        e.printStackTrace();
    }
    return 0;
}
 
Example 3
Source File: VcfFormatConverter.java    From picard with MIT License 5 votes vote down vote up
@Override
protected int doWork() {
    final ProgressLogger progress = new ProgressLogger(LOG, 10000);
    
    IOUtil.assertFileIsReadable(INPUT);
    IOUtil.assertFileIsWritable(OUTPUT);

    final VCFFileReader reader = new VCFFileReader(INPUT, REQUIRE_INDEX);
    final VCFHeader header = new VCFHeader(reader.getFileHeader());
    final SAMSequenceDictionary sequenceDictionary = header.getSequenceDictionary();
    if (CREATE_INDEX && sequenceDictionary == null) {
        throw new PicardException("A sequence dictionary must be available in the input file when creating indexed output.");
    }

    final VariantContextWriterBuilder builder = new VariantContextWriterBuilder()
            .setOutputFile(OUTPUT)
            .setReferenceDictionary(sequenceDictionary);
    if (CREATE_INDEX)
        builder.setOption(Options.INDEX_ON_THE_FLY);
    else
        builder.unsetOption(Options.INDEX_ON_THE_FLY);
    final VariantContextWriter writer = builder.build();
    writer.writeHeader(header);
    final CloseableIterator<VariantContext> iterator = reader.iterator();

    while (iterator.hasNext()) {
        final VariantContext context = iterator.next();
        writer.add(context);
        progress.record(context.getContig(), context.getStart());
    }

    CloserUtil.close(iterator);
    CloserUtil.close(reader);
    writer.close();

    return 0;
}
 
Example 4
Source File: FixVcfHeaderTest.java    From picard with MIT License 5 votes vote down vote up
private void runFixVcfHeader(final int checkFirstNRecords,
                             final File replacementHeader,
                             final boolean enforceSampleSamples) throws IOException {
    final FixVcfHeader program = new FixVcfHeader();
    final File outputVcf = VcfTestUtils.createTemporaryIndexedFile("output.", ".vcf");

    program.INPUT      = INPUT_VCF;
    program.OUTPUT     = outputVcf;
    if (replacementHeader == null) {
        program.CHECK_FIRST_N_RECORDS = checkFirstNRecords;
    }
    else {
        program.HEADER = replacementHeader;
        program.ENFORCE_SAME_SAMPLES = enforceSampleSamples;
    }

    Assert.assertEquals(program.instanceMain(new String[0]), 0);

    final VCFFileReader actualReader   = new VCFFileReader(OUTPUT_VCF, false);
    final VCFFileReader expectedReader = new VCFFileReader(outputVcf, false);

    // Check that the headers match (order does not matter
    final VCFHeader actualHeader   = actualReader.getFileHeader();
    final VCFHeader expectedHeader = expectedReader.getFileHeader();
    Assert.assertEquals(actualHeader.getFilterLines().size(), expectedHeader.getFilterLines().size());
    Assert.assertEquals(actualHeader.getInfoHeaderLines().size(), expectedHeader.getInfoHeaderLines().size());
    Assert.assertEquals(actualHeader.getFormatHeaderLines().size(), expectedHeader.getFormatHeaderLines().size());

    // Check the number of records match, since we don't touch them
    Assert.assertEquals(actualReader.iterator().stream().count(), expectedReader.iterator().stream().count(), "The wrong number of variants was found.");

    CloserUtil.close(actualReader);
    CloserUtil.close(expectedReader);
}
 
Example 5
Source File: UpdateVcfSequenceDictionary.java    From picard with MIT License 5 votes vote down vote up
@Override
protected int doWork() {
    IOUtil.assertFileIsReadable(INPUT);
    IOUtil.assertFileIsReadable(SEQUENCE_DICTIONARY);
    IOUtil.assertFileIsWritable(OUTPUT);

    final SAMSequenceDictionary samSequenceDictionary = SAMSequenceDictionaryExtractor.extractDictionary(SEQUENCE_DICTIONARY.toPath());

    final VCFFileReader fileReader = new VCFFileReader(INPUT, false);
    final VCFHeader fileHeader = fileReader.getFileHeader();

    final VariantContextWriterBuilder builder = new VariantContextWriterBuilder()
            .setReferenceDictionary(samSequenceDictionary)
            .clearOptions();
    if (CREATE_INDEX)
        builder.setOption(Options.INDEX_ON_THE_FLY);

    final VariantContextWriter vcfWriter = builder.setOutputFile(OUTPUT).build();
    fileHeader.setSequenceDictionary(samSequenceDictionary);
    vcfWriter.writeHeader(fileHeader);

    final ProgressLogger progress = new ProgressLogger(log, 10000);
    final CloseableIterator<VariantContext> iterator = fileReader.iterator();
    while (iterator.hasNext()) {
        final VariantContext context = iterator.next();
        vcfWriter.add(context);
        progress.record(context.getContig(), context.getStart());
    }

    CloserUtil.close(iterator);
    CloserUtil.close(fileReader);
    vcfWriter.close();

    return 0;
}
 
Example 6
Source File: RenameSampleInVcf.java    From picard with MIT License 5 votes vote down vote up
@Override
protected int doWork() {
    IOUtil.assertFileIsReadable(INPUT);
    IOUtil.assertFileIsWritable(OUTPUT);

    final VCFFileReader in = new VCFFileReader(INPUT, false);
    final VCFHeader header = in.getFileHeader();

    if (header.getGenotypeSamples().size() > 1) {
        throw new IllegalArgumentException("Input VCF must be single-sample.");
    }

    if (OLD_SAMPLE_NAME != null && !OLD_SAMPLE_NAME.equals(header.getGenotypeSamples().get(0))) {
        throw new IllegalArgumentException("Input VCF did not contain expected sample. Contained: " + header.getGenotypeSamples().get(0));
    }

    final EnumSet<Options> options = EnumSet.copyOf(VariantContextWriterBuilder.DEFAULT_OPTIONS);
    if (CREATE_INDEX) options.add(Options.INDEX_ON_THE_FLY); else options.remove(Options.INDEX_ON_THE_FLY);

    final VCFHeader outHeader = new VCFHeader(header.getMetaDataInInputOrder(), CollectionUtil.makeList(NEW_SAMPLE_NAME));
    final VariantContextWriter out = new VariantContextWriterBuilder()
            .setOptions(options)
            .setOutputFile(OUTPUT).setReferenceDictionary(outHeader.getSequenceDictionary()).build();
    out.writeHeader(outHeader);

    for (final VariantContext ctx : in) {
        out.add(ctx);
    }

    out.close();
    in.close();

    return 0;
}
 
Example 7
Source File: SortVcf.java    From picard with MIT License 5 votes vote down vote up
private void collectFileReadersAndHeaders(final List<String> sampleList, SAMSequenceDictionary samSequenceDictionary) {
    for (final File input : INPUT) {
        final VCFFileReader in = new VCFFileReader(input, false);
        final VCFHeader header = in.getFileHeader();
        final SAMSequenceDictionary dict = in.getFileHeader().getSequenceDictionary();
        if (dict == null || dict.isEmpty()) {
            if (null == samSequenceDictionary) {
                throw new IllegalArgumentException("Sequence dictionary was missing or empty for the VCF: " + input.getAbsolutePath() + " Please add a sequence dictionary to this VCF or specify SEQUENCE_DICTIONARY.");
            }
            header.setSequenceDictionary(samSequenceDictionary);
        } else {
            if (null == samSequenceDictionary) {
                samSequenceDictionary = dict;
            } else {
                try {
                    samSequenceDictionary.assertSameDictionary(dict);
                } catch (final AssertionError e) {
                    throw new IllegalArgumentException(e);
                }
            }
        }
        if (sampleList.isEmpty()) {
            sampleList.addAll(header.getSampleNamesInOrder());
        } else {
            if (!sampleList.equals(header.getSampleNamesInOrder())) {
                throw new IllegalArgumentException("Input file " + input.getAbsolutePath() + " has sample names that don't match the other files.");
            }
        }
        inputReaders.add(in);
        inputHeaders.add(header);
    }
}
 
Example 8
Source File: VcfFileSegmentGenerator.java    From picard with MIT License 5 votes vote down vote up
private static List<SAMSequenceRecord> readSequences(final File vcf) {
    final VCFFileReader reader = new VCFFileReader(vcf);
    final VCFHeader header = reader.getFileHeader();
    final SAMSequenceDictionary dict = header.getSequenceDictionary();
    reader.close();
    return dict.getSequences();
}
 
Example 9
Source File: MNVDetectorApplication.java    From hmftools with GNU General Public License v3.0 5 votes vote down vote up
private static void processVariants(@NotNull final String filePath, @NotNull final String outputVcf, @NotNull final String outputBed,
        boolean strelka) throws IOException {
    final VCFFileReader vcfReader = new VCFFileReader(new File(filePath), false);
    final VCFHeader outputHeader =
            strelka ? generateOutputHeader(vcfReader.getFileHeader(), StrelkaPostProcess.TUMOR_GENOTYPE) : vcfReader.getFileHeader();
    final BufferedWriter bedWriter = new BufferedWriter(new FileWriter(outputBed, false));
    final VariantContextWriter vcfWriter = new VariantContextWriterBuilder().setOutputFile(outputVcf)
            .setReferenceDictionary(outputHeader.getSequenceDictionary())
            .build();
    vcfWriter.writeHeader(outputHeader);

    Pair<PotentialMNVRegion, Optional<PotentialMNVRegion>> outputPair = ImmutablePair.of(PotentialMNVRegion.empty(), Optional.empty());
    for (final VariantContext rawVariant : vcfReader) {
        final VariantContext variant =
                strelka ? StrelkaPostProcess.simplifyVariant(rawVariant, StrelkaPostProcess.TUMOR_GENOTYPE) : rawVariant;

        final PotentialMNVRegion potentialMNVregion = outputPair.getLeft();
        outputPair = MNVDetector.addMnvToRegion(potentialMNVregion, variant);
        outputPair.getRight()
                .ifPresent(mnvRegion -> filterMnvRegion(mnvRegion).ifPresent(filteredRegion -> writeMnvRegionToFiles(filteredRegion,
                        vcfWriter,
                        bedWriter,
                        "\n")));
    }
    filterMnvRegion(outputPair.getLeft()).ifPresent(mnvRegion -> writeMnvRegionToFiles(mnvRegion, vcfWriter, bedWriter, ""));
    vcfWriter.close();
    vcfReader.close();
    bedWriter.close();
    LOGGER.info("Written output variants to {}. Written bed regions to {}.", outputVcf, outputBed);
}
 
Example 10
Source File: CreateSomaticPanelOfNormals.java    From gatk-protected with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
public Object doWork() {
    final List<File> inputVcfs = new ArrayList<>(vcfs);
    final Collection<CloseableIterator<VariantContext>> iterators = new ArrayList<>(inputVcfs.size());
    final Collection<VCFHeader> headers = new HashSet<>(inputVcfs.size());
    final VCFHeader headerOfFirstVcf = new VCFFileReader(inputVcfs.get(0), false).getFileHeader();
    final SAMSequenceDictionary sequenceDictionary = headerOfFirstVcf.getSequenceDictionary();
    final VariantContextComparator comparator = headerOfFirstVcf.getVCFRecordComparator();


    for (final File vcf : inputVcfs) {
        final VCFFileReader reader = new VCFFileReader(vcf, false);
        iterators.add(reader.iterator());
        final VCFHeader header = reader.getFileHeader();
        Utils.validateArg(comparator.isCompatible(header.getContigLines()), () -> vcf.getAbsolutePath() + " has incompatible contigs.");
        headers.add(header);
    }

    final VariantContextWriter writer = GATKVariantContextUtils.createVCFWriter(outputVcf, sequenceDictionary, false, Options.INDEX_ON_THE_FLY);
    writer.writeHeader(new VCFHeader(VCFUtils.smartMergeHeaders(headers, false)));

    final MergingIterator<VariantContext> mergingIterator = new MergingIterator<>(comparator, iterators);
    SimpleInterval currentPosition = new SimpleInterval("FAKE", 1, 1);
    final List<VariantContext> variantsAtThisPosition = new ArrayList<>(20);
    while (mergingIterator.hasNext()) {
        final VariantContext vc = mergingIterator.next();
        if (!currentPosition.overlaps(vc)) {
            processVariantsAtSamePosition(variantsAtThisPosition, writer);
            variantsAtThisPosition.clear();
            currentPosition = new SimpleInterval(vc.getContig(), vc.getStart(), vc.getStart());
        }
        variantsAtThisPosition.add(vc);
    }
    mergingIterator.close();
    writer.close();

    return "SUCCESS";
}
 
Example 11
Source File: VcfToVariantTest.java    From genomewarp with Apache License 2.0 5 votes vote down vote up
@Test
public void testGetInfo() throws Exception {
  File vcfFile =
      new File(VcfToVariant.class.getClassLoader().getResource(VALID_VCF_4_1).getFile());
  VCFFileReader vcfReader = new VCFFileReader(vcfFile, false);
  VCFHeader vcfHeader = vcfReader.getFileHeader();
  int currVariant = 0;

  for (final VariantContext vc : vcfReader) {
    Map<String, ListValue> info = VcfToVariant.getInfo(vc, vcfHeader);
    assertEquals(info, TRUTH.get(currVariant).getInfo());
    currVariant++;
  }
}
 
Example 12
Source File: VcfToVariant.java    From genomewarp with Apache License 2.0 5 votes vote down vote up
public static List<Variant> convertVcfToVariant(File filepath, boolean checkVersion)
    throws IOException {
  // Check version
  if (checkVersion && !validVersion(filepath)) {
    return null;
  }

  VCFFileReader vcfReader = new VCFFileReader(filepath, false);
  VCFHeader header = vcfReader.getFileHeader();

  return convertContextToVariant(vcfReader, header);
}
 
Example 13
Source File: FastVCFFileReader.java    From imputationserver with GNU Affero General Public License v3.0 5 votes vote down vote up
public FastVCFFileReader(String vcfFilename) throws IOException {

		super(vcfFilename);
		// load header
		VCFFileReader reader = new VCFFileReader(new File(vcfFilename), false);
		VCFHeader header = reader.getFileHeader();
		samples = header.getGenotypeSamples();
		samplesCount = samples.size();
		variantContext = new MinimalVariantContext(samplesCount);
		reader.close();

		parser = new VCFLineParser(samplesCount);

	}
 
Example 14
Source File: VcfToAdpc.java    From picard with MIT License 4 votes vote down vote up
@Override
protected int doWork() {
    final List<File> inputs = IOUtil.unrollFiles(VCF, IOUtil.VCF_EXTENSIONS);
    IOUtil.assertFilesAreReadable(inputs);
    IOUtil.assertFileIsWritable(SAMPLES_FILE);
    IOUtil.assertFileIsWritable(NUM_MARKERS_FILE);
    IOUtil.assertFileIsWritable(OUTPUT);
    final List<String> sampleNames = new ArrayList<>();

    Integer numberOfLoci = null;
    try (IlluminaAdpcFileWriter adpcFileWriter = new IlluminaAdpcFileWriter(OUTPUT)) {
        for (final File inputVcf : inputs) {
            VCFFileReader vcfFileReader = new VCFFileReader(inputVcf, false);
            final VCFHeader header = vcfFileReader.getFileHeader();
            for (int sampleNumber = 0; sampleNumber < header.getNGenotypeSamples(); sampleNumber++) {
                final String sampleName = header.getGenotypeSamples().get(sampleNumber);
                sampleNames.add(sampleName);
                log.info("Processing sample: " + sampleName + " from VCF: " + inputVcf.getAbsolutePath());

                CloseableIterator<VariantContext> variants = vcfFileReader.iterator();
                int lociCount = 0;
                while (variants.hasNext()) {
                    final VariantContext context = variants.next();
                    final float gcScore = getFloatAttribute(context, InfiniumVcfFields.GC_SCORE);

                    final Genotype genotype = context.getGenotype(sampleNumber);
                    final IlluminaGenotype illuminaGenotype = getIlluminaGenotype(genotype, context);

                    final int rawXIntensity = getUnsignedShortAttributeAsInt(genotype, InfiniumVcfFields.X);
                    final int rawYIntensity = getUnsignedShortAttributeAsInt(genotype, InfiniumVcfFields.Y);

                    final Float normalizedXIntensity = getFloatAttribute(genotype, InfiniumVcfFields.NORMX);
                    final Float normalizedYIntensity = getFloatAttribute(genotype, InfiniumVcfFields.NORMY);

                    final IlluminaAdpcFileWriter.Record record = new IlluminaAdpcFileWriter.Record(rawXIntensity, rawYIntensity, normalizedXIntensity, normalizedYIntensity, gcScore, illuminaGenotype);
                    adpcFileWriter.write(record);
                    lociCount++;
                }
                if (lociCount == 0) {
                    throw new PicardException("Found no records in VCF' " + inputVcf.getAbsolutePath() + "'");
                }
                if (numberOfLoci == null) {
                    numberOfLoci = lociCount;
                } else {
                    if (lociCount != numberOfLoci) {
                        throw new PicardException("VCFs have differing number of loci");
                    }
                }
            }
        }
        writeTextToFile(SAMPLES_FILE, StringUtils.join(sampleNames, "\n"));
        writeTextToFile(NUM_MARKERS_FILE, "" + numberOfLoci);
    } catch (Exception e) {
        log.error(e);
        return 1;
    }

    return 0;
}
 
Example 15
Source File: MergeVcfs.java    From picard with MIT License 4 votes vote down vote up
@Override
protected int doWork() {
    final ProgressLogger progress = new ProgressLogger(log, 10000);
    final List<String> sampleList = new ArrayList<String>();
    INPUT = IOUtil.unrollFiles(INPUT, IOUtil.VCF_EXTENSIONS);
    final Collection<CloseableIterator<VariantContext>> iteratorCollection = new ArrayList<CloseableIterator<VariantContext>>(INPUT.size());
    final Collection<VCFHeader> headers = new HashSet<VCFHeader>(INPUT.size());
    VariantContextComparator variantContextComparator = null;
    SAMSequenceDictionary sequenceDictionary = null;

    if (SEQUENCE_DICTIONARY != null) {
        sequenceDictionary = SamReaderFactory.makeDefault().referenceSequence(REFERENCE_SEQUENCE).open(SEQUENCE_DICTIONARY).getFileHeader().getSequenceDictionary();
    }

    for (final File file : INPUT) {
        IOUtil.assertFileIsReadable(file);
        final VCFFileReader fileReader = new VCFFileReader(file, false);
        final VCFHeader fileHeader = fileReader.getFileHeader();
        if (fileHeader.getContigLines().isEmpty()) {
            if (sequenceDictionary == null) {
                throw new IllegalArgumentException(SEQ_DICT_REQUIRED);
            } else {
                fileHeader.setSequenceDictionary(sequenceDictionary);
            }
        }

        if (variantContextComparator == null) {
            variantContextComparator = fileHeader.getVCFRecordComparator();
        } else {
            if (!variantContextComparator.isCompatible(fileHeader.getContigLines())) {
                throw new IllegalArgumentException(
                        "The contig entries in input file " + file.getAbsolutePath() + " are not compatible with the others.");
            }
        }

        if (sequenceDictionary == null) sequenceDictionary = fileHeader.getSequenceDictionary();

        if (sampleList.isEmpty()) {
            sampleList.addAll(fileHeader.getSampleNamesInOrder());
        } else {
            if (!sampleList.equals(fileHeader.getSampleNamesInOrder())) {
                throw new IllegalArgumentException("Input file " + file.getAbsolutePath() + " has sample entries that don't match the other files.");
            }
        }
        
        // add comments in the first header
        if (headers.isEmpty()) {
            COMMENT.stream().forEach(C -> fileHeader.addMetaDataLine(new VCFHeaderLine("MergeVcfs.comment", C)));
        }

        headers.add(fileHeader);
        iteratorCollection.add(fileReader.iterator());
    }

    if (CREATE_INDEX && sequenceDictionary == null) {
        throw new PicardException(String.format("Index creation failed. %s", SEQ_DICT_REQUIRED));
    }

    final VariantContextWriterBuilder builder = new VariantContextWriterBuilder()
            .setOutputFile(OUTPUT)
            .setReferenceDictionary(sequenceDictionary);

    if (CREATE_INDEX) {
        builder.setOption(Options.INDEX_ON_THE_FLY);
    } else {
        builder.unsetOption(Options.INDEX_ON_THE_FLY);
    }
    final VariantContextWriter writer = builder.build();

    writer.writeHeader(new VCFHeader(VCFUtils.smartMergeHeaders(headers, false), sampleList));

    final MergingIterator<VariantContext> mergingIterator = new MergingIterator<VariantContext>(variantContextComparator, iteratorCollection);
    while (mergingIterator.hasNext()) {
        final VariantContext context = mergingIterator.next();
        writer.add(context);
        progress.record(context.getContig(), context.getStart());
    }

    CloserUtil.close(mergingIterator);
    writer.close();
    return 0;
}
 
Example 16
Source File: SplitVcfs.java    From picard with MIT License 4 votes vote down vote up
@Override
protected int doWork() {
    IOUtil.assertFileIsReadable(INPUT);
    final ProgressLogger progress = new ProgressLogger(log, 10000);

    final VCFFileReader fileReader = new VCFFileReader(INPUT, false);
    final VCFHeader fileHeader = fileReader.getFileHeader();

    final SAMSequenceDictionary sequenceDictionary =
            SEQUENCE_DICTIONARY != null
                    ? SamReaderFactory.makeDefault().referenceSequence(REFERENCE_SEQUENCE).getFileHeader(SEQUENCE_DICTIONARY).getSequenceDictionary()
                    : fileHeader.getSequenceDictionary();
    if (CREATE_INDEX && sequenceDictionary == null) {
        throw new PicardException("A sequence dictionary must be available (either through the input file or by setting it explicitly) when creating indexed output.");
    }

    final VariantContextWriterBuilder builder = new VariantContextWriterBuilder()
            .setReferenceDictionary(sequenceDictionary)
            .clearOptions();
    if (CREATE_INDEX)
        builder.setOption(Options.INDEX_ON_THE_FLY);

    final VariantContextWriter snpWriter = builder.setOutputFile(SNP_OUTPUT).build();
    final VariantContextWriter indelWriter = builder.setOutputFile(INDEL_OUTPUT).build();
    snpWriter.writeHeader(fileHeader);
    indelWriter.writeHeader(fileHeader);

    int incorrectVariantCount = 0;

    final CloseableIterator<VariantContext> iterator = fileReader.iterator();
    while (iterator.hasNext()) {
        final VariantContext context = iterator.next();
        if (context.isIndel()) indelWriter.add(context);
        else if (context.isSNP()) snpWriter.add(context);
        else {
            if (STRICT) throw new IllegalStateException("Found a record with type " + context.getType().name());
            else incorrectVariantCount++;
        }

        progress.record(context.getContig(), context.getStart());
    }

    if (incorrectVariantCount > 0) {
        log.debug("Found " + incorrectVariantCount + " records that didn't match SNP or INDEL");
    }

    CloserUtil.close(iterator);
    CloserUtil.close(fileReader);
    snpWriter.close();
    indelWriter.close();

    return 0;
}
 
Example 17
Source File: CollectVariantCallingMetrics.java    From picard with MIT License 4 votes vote down vote up
@Override
protected int doWork() {
    IOUtil.assertFileIsReadable(INPUT);
    IOUtil.assertFileIsReadable(DBSNP);
    if (TARGET_INTERVALS != null) IOUtil.assertFileIsReadable(TARGET_INTERVALS);
    if (SEQUENCE_DICTIONARY != null) IOUtil.assertFileIsReadable(SEQUENCE_DICTIONARY.toPath());

    final boolean requiresIndex = this.TARGET_INTERVALS != null || this.THREAD_COUNT > 1;
    final VCFFileReader variantReader = new VCFFileReader(INPUT, requiresIndex);
    final VCFHeader vcfHeader = variantReader.getFileHeader();
    CloserUtil.close(variantReader);

    final SAMSequenceDictionary sequenceDictionary =
            SAMSequenceDictionaryExtractor.extractDictionary(SEQUENCE_DICTIONARY == null ? INPUT.toPath() : SEQUENCE_DICTIONARY.toPath());

    final IntervalList targetIntervals = (TARGET_INTERVALS == null) ? null : IntervalList.fromFile(TARGET_INTERVALS).uniqued();

    log.info("Loading dbSNP file ...");
    final DbSnpBitSetUtil.DbSnpBitSets dbsnp = DbSnpBitSetUtil.createSnpAndIndelBitSets(DBSNP, sequenceDictionary, targetIntervals, Optional.of(log));

    log.info("Starting iteration of variants.");

    final VariantProcessor.Builder<CallingMetricAccumulator, CallingMetricAccumulator.Result> builder =
            VariantProcessor.Builder
                    .generatingAccumulatorsBy(() -> {
                        CallingMetricAccumulator accumulator = GVCF_INPUT ? new GvcfMetricAccumulator(dbsnp) : new CallingMetricAccumulator(dbsnp);
                        accumulator.setup(vcfHeader);
                        return accumulator;
                    })
                    .combiningResultsBy(CallingMetricAccumulator.Result::merge)
                    .withInput(INPUT)
                    .multithreadingBy(THREAD_COUNT);

    if (targetIntervals != null) {
        builder.limitingProcessedRegionsTo(targetIntervals);
    }

    final CallingMetricAccumulator.Result result = builder.build().process();

    // Fetch and write the metrics.
    final MetricsFile<CollectVariantCallingMetrics.VariantCallingDetailMetrics, Integer> detail = getMetricsFile();
    final MetricsFile<CollectVariantCallingMetrics.VariantCallingSummaryMetrics, Integer> summary = getMetricsFile();
    summary.addMetric(result.summary);
    result.details.forEach(detail::addMetric);

    final String outputPrefix = OUTPUT.getAbsolutePath() + ".";
    detail.write(new File(outputPrefix + CollectVariantCallingMetrics.VariantCallingDetailMetrics.getFileExtension()));
    summary.write(new File(outputPrefix + CollectVariantCallingMetrics.VariantCallingSummaryMetrics.getFileExtension()));

    return 0;
}