htsjdk.variant.vcf.VCFUtils Java Examples

The following examples show how to use htsjdk.variant.vcf.VCFUtils. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: CreateSomaticPanelOfNormals.java    From gatk-protected with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
public Object doWork() {
    final List<File> inputVcfs = new ArrayList<>(vcfs);
    final Collection<CloseableIterator<VariantContext>> iterators = new ArrayList<>(inputVcfs.size());
    final Collection<VCFHeader> headers = new HashSet<>(inputVcfs.size());
    final VCFHeader headerOfFirstVcf = new VCFFileReader(inputVcfs.get(0), false).getFileHeader();
    final SAMSequenceDictionary sequenceDictionary = headerOfFirstVcf.getSequenceDictionary();
    final VariantContextComparator comparator = headerOfFirstVcf.getVCFRecordComparator();


    for (final File vcf : inputVcfs) {
        final VCFFileReader reader = new VCFFileReader(vcf, false);
        iterators.add(reader.iterator());
        final VCFHeader header = reader.getFileHeader();
        Utils.validateArg(comparator.isCompatible(header.getContigLines()), () -> vcf.getAbsolutePath() + " has incompatible contigs.");
        headers.add(header);
    }

    final VariantContextWriter writer = GATKVariantContextUtils.createVCFWriter(outputVcf, sequenceDictionary, false, Options.INDEX_ON_THE_FLY);
    writer.writeHeader(new VCFHeader(VCFUtils.smartMergeHeaders(headers, false)));

    final MergingIterator<VariantContext> mergingIterator = new MergingIterator<>(comparator, iterators);
    SimpleInterval currentPosition = new SimpleInterval("FAKE", 1, 1);
    final List<VariantContext> variantsAtThisPosition = new ArrayList<>(20);
    while (mergingIterator.hasNext()) {
        final VariantContext vc = mergingIterator.next();
        if (!currentPosition.overlaps(vc)) {
            processVariantsAtSamePosition(variantsAtThisPosition, writer);
            variantsAtThisPosition.clear();
            currentPosition = new SimpleInterval(vc.getContig(), vc.getStart(), vc.getStart());
        }
        variantsAtThisPosition.add(vc);
    }
    mergingIterator.close();
    writer.close();

    return "SUCCESS";
}
 
Example #2
Source File: SortVcf.java    From picard with MIT License 5 votes vote down vote up
@Override
protected int doWork() {
    final List<String> sampleList = new ArrayList<String>();

    for (final File input : INPUT) IOUtil.assertFileIsReadable(input);

    if (SEQUENCE_DICTIONARY != null) IOUtil.assertFileIsReadable(SEQUENCE_DICTIONARY);

    SAMSequenceDictionary samSequenceDictionary = null;
    if (SEQUENCE_DICTIONARY != null) {
        samSequenceDictionary = SamReaderFactory.makeDefault().referenceSequence(REFERENCE_SEQUENCE).getFileHeader(SEQUENCE_DICTIONARY).getSequenceDictionary();
        CloserUtil.close(SEQUENCE_DICTIONARY);
    }

    // Gather up a file reader and file header for each input file. Check for sequence dictionary compatibility along the way.
    collectFileReadersAndHeaders(sampleList, samSequenceDictionary);

    // Create the merged output header from the input headers
    final VCFHeader outputHeader = new VCFHeader(VCFUtils.smartMergeHeaders(inputHeaders, false), sampleList);

    // Load entries into the sorting collection
    final SortingCollection<VariantContext> sortedOutput = sortInputs(inputReaders, outputHeader);

    // Output to the final file
    writeSortedOutput(outputHeader, sortedOutput);

    return 0;
}
 
Example #3
Source File: MultiVariantDataSource.java    From gatk with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
/**
 * Update each individual header with the sequence dictionary returned by the corresponding data source;
 * then merge the resulting headers.
 */
private VCFHeader getMergedHeader() {
    final List<VCFHeader> headers = featureDataSources
            .stream()
            .map(ds -> getHeaderWithUpdatedSequenceDictionary(ds))
            .collect(Collectors.toList());

    // Now merge the headers using htsjdk, which is pretty promiscuous, and which only works properly
    // because of the cross-dictionary validation done in validateAllSequenceDictionaries.
    return headers.size() > 1 ?
            new VCFHeader(VCFUtils.smartMergeHeaders(headers, true)) :
            headers.get(0);
}
 
Example #4
Source File: MergeVcfs.java    From picard with MIT License 4 votes vote down vote up
@Override
protected int doWork() {
    final ProgressLogger progress = new ProgressLogger(log, 10000);
    final List<String> sampleList = new ArrayList<String>();
    INPUT = IOUtil.unrollFiles(INPUT, IOUtil.VCF_EXTENSIONS);
    final Collection<CloseableIterator<VariantContext>> iteratorCollection = new ArrayList<CloseableIterator<VariantContext>>(INPUT.size());
    final Collection<VCFHeader> headers = new HashSet<VCFHeader>(INPUT.size());
    VariantContextComparator variantContextComparator = null;
    SAMSequenceDictionary sequenceDictionary = null;

    if (SEQUENCE_DICTIONARY != null) {
        sequenceDictionary = SamReaderFactory.makeDefault().referenceSequence(REFERENCE_SEQUENCE).open(SEQUENCE_DICTIONARY).getFileHeader().getSequenceDictionary();
    }

    for (final File file : INPUT) {
        IOUtil.assertFileIsReadable(file);
        final VCFFileReader fileReader = new VCFFileReader(file, false);
        final VCFHeader fileHeader = fileReader.getFileHeader();
        if (fileHeader.getContigLines().isEmpty()) {
            if (sequenceDictionary == null) {
                throw new IllegalArgumentException(SEQ_DICT_REQUIRED);
            } else {
                fileHeader.setSequenceDictionary(sequenceDictionary);
            }
        }

        if (variantContextComparator == null) {
            variantContextComparator = fileHeader.getVCFRecordComparator();
        } else {
            if (!variantContextComparator.isCompatible(fileHeader.getContigLines())) {
                throw new IllegalArgumentException(
                        "The contig entries in input file " + file.getAbsolutePath() + " are not compatible with the others.");
            }
        }

        if (sequenceDictionary == null) sequenceDictionary = fileHeader.getSequenceDictionary();

        if (sampleList.isEmpty()) {
            sampleList.addAll(fileHeader.getSampleNamesInOrder());
        } else {
            if (!sampleList.equals(fileHeader.getSampleNamesInOrder())) {
                throw new IllegalArgumentException("Input file " + file.getAbsolutePath() + " has sample entries that don't match the other files.");
            }
        }
        
        // add comments in the first header
        if (headers.isEmpty()) {
            COMMENT.stream().forEach(C -> fileHeader.addMetaDataLine(new VCFHeaderLine("MergeVcfs.comment", C)));
        }

        headers.add(fileHeader);
        iteratorCollection.add(fileReader.iterator());
    }

    if (CREATE_INDEX && sequenceDictionary == null) {
        throw new PicardException(String.format("Index creation failed. %s", SEQ_DICT_REQUIRED));
    }

    final VariantContextWriterBuilder builder = new VariantContextWriterBuilder()
            .setOutputFile(OUTPUT)
            .setReferenceDictionary(sequenceDictionary);

    if (CREATE_INDEX) {
        builder.setOption(Options.INDEX_ON_THE_FLY);
    } else {
        builder.unsetOption(Options.INDEX_ON_THE_FLY);
    }
    final VariantContextWriter writer = builder.build();

    writer.writeHeader(new VCFHeader(VCFUtils.smartMergeHeaders(headers, false), sampleList));

    final MergingIterator<VariantContext> mergingIterator = new MergingIterator<VariantContext>(variantContextComparator, iteratorCollection);
    while (mergingIterator.hasNext()) {
        final VariantContext context = mergingIterator.next();
        writer.add(context);
        progress.record(context.getContig(), context.getStart());
    }

    CloserUtil.close(mergingIterator);
    writer.close();
    return 0;
}