Java Code Examples for htsjdk.variant.variantcontext.VariantContext#isSNP()

The following examples show how to use htsjdk.variant.variantcontext.VariantContext#isSNP() . You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: EvaluateInfoFieldConcordance.java    From gatk with BSD 3-Clause "New" or "Revised" License 6 votes vote down vote up
private void infoDifference(final VariantContext eval, final VariantContext truth) {
    if(eval.hasAttribute(this.evalInfoKey) && truth.hasAttribute(truthInfoKey)) {
        final double evalVal = Double.valueOf((String) eval.getAttribute(this.evalInfoKey));
        final double truthVal = Double.valueOf((String) truth.getAttribute(this.truthInfoKey));
        final double delta = evalVal - truthVal;
        final double deltaSquared = delta * delta;
        if (eval.isSNP()) {
            this.snpSumDelta += Math.sqrt(deltaSquared);
            this.snpSumDeltaSquared += deltaSquared;
        } else if (eval.isIndel()) {
            this.indelSumDelta += Math.sqrt(deltaSquared);
            this.indelSumDeltaSquared += deltaSquared;
        }
        if (warnBigDifferences && Math.abs(delta) > this.epsilon) {
            this.logger.warn(String.format("Difference (%f) greater than epsilon (%f) at %s:%d %s:", delta, this.epsilon, eval.getContig(), eval.getStart(), eval.getAlleles().toString()));
            this.logger.warn(String.format("\t\tTruth info: " + truth.getAttributes().toString()));
            this.logger.warn(String.format("\t\tEval info: " + eval.getAttributes().toString()));
        }
    }
}
 
Example 2
Source File: SamRecordScoring.java    From hmftools with GNU General Public License v3.0 6 votes vote down vote up
@NotNull
private static ReadType getReadType(@NotNull final SAMRecord record, @NotNull final VariantContext variant) {
    final Allele alt = variant.getAlternateAllele(0);
    final Allele ref = variant.getReference();
    final int recordIdxOfVariantStart = record.getReadPositionAtReferencePosition(variant.getStart());
    if (recordIdxOfVariantStart == 0) {
        // Variant position was deleted
        return ReadType.MISSING;

    }
    if (variant.isSNP()) {
        if (record.getReadString().charAt(recordIdxOfVariantStart - 1) == alt.getBaseString().charAt(0)) {
            return ReadType.ALT;
        } else {
            return ReadType.REF;
        }
    }
    if (variant.isSimpleInsertion()) {
        return SAMRecords.containsInsert(record, variant.getStart(), alt.getBaseString()) ? ReadType.ALT : ReadType.REF;
    }
    if (variant.isSimpleDeletion()) {
        return SAMRecords.containsDelete(record, variant.getStart(), ref.getBaseString()) ? ReadType.ALT : ReadType.REF;
    }
    return ReadType.OTHER;
}
 
Example 3
Source File: FilterVariantTranches.java    From gatk with BSD 3-Clause "New" or "Revised" License 6 votes vote down vote up
@Override
protected void secondPassApply(VariantContext variant, ReadsContext readsContext, ReferenceContext referenceContext, FeatureContext featureContext) {
    final VariantContextBuilder builder = new VariantContextBuilder(variant);

    if (removeOldFilters) {
        builder.unfiltered();
    }

    if (variant.hasAttribute(infoKey)) {
        final double score = Double.parseDouble((String) variant.getAttribute(infoKey));
        if (variant.isSNP() && snpCutoffs.size() != 0 && isTrancheFiltered(score, snpCutoffs)) {
            builder.filter(filterStringFromScore(SNPString, score, snpTranches, snpCutoffs));
            filteredSnps++;
        } else if (variant.isIndel() && indelCutoffs.size() != 0 && isTrancheFiltered(score, indelCutoffs)) {
            builder.filter(filterStringFromScore(INDELString, score, indelTranches, indelCutoffs));
            filteredIndels++;
        }
    }

    if (builder.getFilters() == null || builder.getFilters().size() == 0){
        builder.passFilters();
    }
    
    vcfWriter.add(builder.make());
}
 
Example 4
Source File: TiTvVariantEvaluator.java    From gatk with BSD 3-Clause "New" or "Revised" License 6 votes vote down vote up
public void updateTiTv(VariantContext vc, boolean updateStandard) {
    if (vc != null && vc.isSNP() && vc.isBiallelic() && vc.isPolymorphicInSamples()) {
        if ( GATKVariantContextUtils.isTransition(vc)) {
            if (updateStandard) nTiInComp++;
            else nTi++;
        } else {                                
            if (updateStandard) nTvInComp++;
            else nTv++;
        }

        if (vc.hasAttribute("ANCESTRALALLELE")) {
            final String aaStr = vc.getAttributeAsString("ANCESTRALALLELE", "null").toUpperCase();
            if ( ! aaStr.equals(".") ) {
                switch ( BaseUtils.SNPSubstitutionType(aaStr.getBytes()[0], vc.getAlternateAllele(0).getBases()[0] ) ) {
                    case TRANSITION: nTiDerived++; break;
                    case TRANSVERSION: nTvDerived++; break;
                    default: break;
                }
            }
        }
    }
}
 
Example 5
Source File: SomaticRefContextEnrichment.java    From hmftools with GNU General Public License v3.0 5 votes vote down vote up
@NotNull
private Optional<RepeatContext> getRepeatContext(@NotNull final VariantContext variant, int relativePosition,
        @NotNull final String sequence) {
    if (variant.isIndel()) {
        return RepeatContextFactory.repeats(relativePosition + 1, sequence);
    } else if (variant.isSNP() || variant.isMNP()) {
        int altLength = variant.getAlternateAllele(0).getBaseString().length();

        Optional<RepeatContext> priorRepeat = RepeatContextFactory.repeats(relativePosition - 1, sequence);
        Optional<RepeatContext> postRepeat = RepeatContextFactory.repeats(relativePosition + altLength, sequence);
        return max(priorRepeat, postRepeat);
    } else {
        return Optional.empty();
    }
}
 
Example 6
Source File: GetPileupSummaries.java    From gatk with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
@Override
public void apply(AlignmentContext alignmentContext, ReferenceContext referenceContext, FeatureContext featureContext) {
    final List<VariantContext> vcs = featureContext.getValues(variants);
    if (vcs.isEmpty()) {
        return;
    }
    final VariantContext vc = vcs.get(0);

    if ( vc.isBiallelic() && vc.isSNP() && alleleFrequencyInRange(vc) ) {
        final ReadPileup pileup = alignmentContext.getBasePileup()
                .makeFilteredPileup(pe -> pe.getRead().getMappingQuality() >= minMappingQuality);
        pileupSummaries.add(new PileupSummary(vc, pileup));
    }
}
 
Example 7
Source File: DbSnpBitSetUtil.java    From gatk with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
/** Private helper method to read through the VCF and create one or more bit sets. */
private static void loadVcf(final File dbSnpFile,
                            final SAMSequenceDictionary sequenceDictionary,
                            final Map<DbSnpBitSetUtil, Set<DbSnpVariantType>> bitSetsToVariantTypes) {

    final VCFFileReader variantReader = new VCFFileReader(dbSnpFile);
    final CloseableIterator<VariantContext> variantIterator = variantReader.iterator();

    while (variantIterator.hasNext()) {
        final VariantContext kv = variantIterator.next();

        for (final Map.Entry<DbSnpBitSetUtil, Set<DbSnpVariantType>> tuple : bitSetsToVariantTypes.entrySet()) {
            final DbSnpBitSetUtil bitset            = tuple.getKey();
            final Set<DbSnpVariantType> variantsToMatch  = tuple.getValue();

            BitSet bits = bitset.sequenceToBitSet.get(kv.getContig());
            if (bits == null) {
                final int nBits;
                if (sequenceDictionary == null) nBits = kv.getEnd() + 1;
                else nBits = sequenceDictionary.getSequence(kv.getContig()).getSequenceLength() + 1;
                bits = new BitSet(nBits);
                bitset.sequenceToBitSet.put(kv.getContig(), bits);
            }
            if (variantsToMatch.isEmpty() ||
                    (kv.isSNP() && variantsToMatch.contains(DbSnpVariantType.SNP)) ||
                    (kv.isIndel() && variantsToMatch.contains(DbSnpVariantType.insertion)) ||
                    (kv.isIndel() && variantsToMatch.contains(DbSnpVariantType.deletion))) {

                for (int i = kv.getStart(); i <= kv.getEnd(); i++)  bits.set(i, true);
            }
        }
    }

    CloserUtil.close(variantIterator);
    CloserUtil.close(variantReader);
}
 
Example 8
Source File: EventMap.java    From gatk with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
/**
 * Create a block substitution out of two variant contexts that start at the same position
 *
 * vc1 can be SNP, and vc2 can then be either a insertion or deletion.
 * If vc1 is an indel, then vc2 must be the opposite type (vc1 deletion => vc2 must be an insertion)
 *
 * @param vc1 the first variant context we want to merge
 * @param vc2 the second
 * @return a block substitution that represents the composite substitution implied by vc1 and vc2
 */
protected VariantContext makeBlock(final VariantContext vc1, final VariantContext vc2) {
    Utils.validateArg( vc1.getStart() == vc2.getStart(), () -> "vc1 and 2 must have the same start but got " + vc1 + " and " + vc2);
    Utils.validateArg( vc1.isBiallelic(), "vc1 must be biallelic");
    if ( ! vc1.isSNP() ) {
        Utils.validateArg ( (vc1.isSimpleDeletion() && vc2.isSimpleInsertion()) || (vc1.isSimpleInsertion() && vc2.isSimpleDeletion()),
                () -> "Can only merge single insertion with deletion (or vice versa) but got " + vc1 + " merging with " + vc2);
    } else {
        Utils.validateArg(!vc2.isSNP(), () -> "vc1 is " + vc1 + " but vc2 is a SNP, which implies there's been some terrible bug in the cigar " + vc2);
    }

    final Allele ref, alt;
    final VariantContextBuilder b = new VariantContextBuilder(vc1);
    if ( vc1.isSNP() ) {
        // we have to repair the first base, so SNP case is special cased
        if ( vc1.getReference().equals(vc2.getReference()) ) {
            // we've got an insertion, so we just update the alt to have the prev alt
            ref = vc1.getReference();
            alt = Allele.create(vc1.getAlternateAllele(0).getDisplayString() + vc2.getAlternateAllele(0).getDisplayString().substring(1), false);
        } else {
            // we're dealing with a deletion, so we patch the ref
            ref = vc2.getReference();
            alt = vc1.getAlternateAllele(0);
            b.stop(vc2.getEnd());
        }
    } else {
        final VariantContext insertion = vc1.isSimpleInsertion() ? vc1 : vc2;
        final VariantContext deletion  = vc1.isSimpleInsertion() ? vc2 : vc1;
        ref = deletion.getReference();
        alt = insertion.getAlternateAllele(0);
        b.stop(deletion.getEnd());
    }

    return b.alleles(Arrays.asList(ref, alt)).make();
}
 
Example 9
Source File: MultiallelicSummary.java    From gatk with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
private void calculatePairwiseTiTv(VariantContext vc) {
    if (!vc.isSNP())
        return;

    for ( Allele alt : vc.getAlternateAlleles() ) {
        if (BaseUtils.SNPSubstitutionType(vc.getReference().getBases()[0], alt.getBases()[0]) == BaseUtils.BaseSubstitutionType.TRANSITION)
            nTi++;
        else
            nTv++;
    }
}
 
Example 10
Source File: LiftoverVcf.java    From picard with MIT License 5 votes vote down vote up
/**
 *  utility function to attempt to add a variant. Checks that the reference allele still matches the reference (which may have changed)
 *
 * @param vc new {@link VariantContext}
 * @param refSeq {@link ReferenceSequence} of new reference
 * @param source the original {@link VariantContext} to use for putting the original location information into vc
 */
private void tryToAddVariant(final VariantContext vc, final ReferenceSequence refSeq, final VariantContext source) {
    if (!refSeq.getName().equals(vc.getContig())) {
        throw new IllegalStateException("The contig of the VariantContext, " + vc.getContig() + ", doesnt match the ReferenceSequence: " + refSeq.getName());
    }

    // Check that the reference allele still agrees with the reference sequence
    final boolean mismatchesReference;
    final Allele allele = vc.getReference();
    final byte[] ref = refSeq.getBases();
    final String refString = StringUtil.bytesToString(ref, vc.getStart() - 1, vc.getEnd() - vc.getStart() + 1);

    if (!refString.equalsIgnoreCase(allele.getBaseString())) {
        // consider that the ref and the alt may have been swapped in a simple biallelic SNP
        if (vc.isBiallelic() && vc.isSNP() && refString.equalsIgnoreCase(vc.getAlternateAllele(0).getBaseString())) {
            totalTrackedAsSwapRefAlt++;
            if (RECOVER_SWAPPED_REF_ALT) {
                addAndTrack(LiftoverUtils.swapRefAlt(vc, TAGS_TO_REVERSE, TAGS_TO_DROP), source);
                return;
            }
        }
        mismatchesReference = true;
    } else {
        mismatchesReference = false;
    }


    if (mismatchesReference) {
        rejectedRecords.add(new VariantContextBuilder(source)
                .filter(FILTER_MISMATCHING_REF_ALLELE)
                .attribute(ATTEMPTED_LOCUS, String.format("%s:%d-%d", vc.getContig(), vc.getStart(), vc.getEnd()))
                .attribute(ATTEMPTED_ALLELES, vc.getReference().toString() + "->" + String.join(",", vc.getAlternateAlleles().stream().map(Allele::toString).collect(Collectors.toList())))
                .make());
        failedAlleleCheck++;
        trackLiftedVariantContig(rejectsByContig, source.getContig());
    } else {
        addAndTrack(vc, source);
    }
}
 
Example 11
Source File: VariantRecalibrator.java    From gatk with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
/**
 * add a datum representing a variant site (or allele) to the data in {@code variants}, which represents the callset to be recalibrated
 * @param variants is modified by having a new VariantDatum added to it
 */
private void addDatum(
        final ArrayList<VariantDatum> variants,
        final boolean isInput,
        final FeatureContext featureContext,
        final VariantContext vc,
        final Allele refAllele,
        final Allele altAllele) {
    final VariantDatum datum = new VariantDatum();

    // Populate the datum with lots of fields from the VariantContext, unfortunately the VC is too big so we just
    // pull in only the things we absolutely need.
    datum.referenceAllele = refAllele;
    datum.alternateAllele = altAllele;
    dataManager.decodeAnnotations(datum, vc, true);

    // non-deterministic because order of calls depends on load of machine
    datum.loc = (isInput ? new SimpleInterval(vc) : null);

    datum.originalQual = vc.getPhredScaledQual();
    datum.isSNP = vc.isSNP() && vc.isBiallelic();
    datum.isTransition = datum.isSNP && GATKVariantContextUtils.isTransition(vc);
    datum.isAggregate = !isInput;

    // Loop through the training data sets and if they overlap this locus (and allele, if applicable) then update
    // the prior and training status appropriately. The locus used to find training set variants is retrieved
    // by parseTrainingSets from the FeatureContext argument.
    dataManager.parseTrainingSets(featureContext, vc, datum, TRUST_ALL_POLYMORPHIC);
    final double priorFactor = QualityUtils.qualToProb(datum.prior);
    datum.prior = Math.log10(priorFactor) - Math.log10(1.0 - priorFactor);

    variants.add(datum);
}
 
Example 12
Source File: VariantAFEvaluator.java    From gatk with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
public void update1(VariantContext vc, final ReferenceContext referenceContext, final ReadsContext readsContext, final FeatureContext featureContext) {
    vc.getStart();

    if (vc == null || !vc.isSNP() || (getWalker().ignoreAC0Sites() && vc.isMonomorphicInSamples())) {
        return;
    }

    for (final Genotype genotype : vc.getGenotypes()) {
         // eval array
        if (!genotype.isNoCall()) {
            if (genotype.getPloidy() != PLOIDY) {

                throw new UserException.BadInput("This tool only works with ploidy 2");
            }
            // add AF at this site
            this.totalCalledSites += 1;
            int numReferenceAlleles= genotype.countAllele(vc.getReference());
            double varAFHere = (PLOIDY - numReferenceAlleles)/PLOIDY;
            this.sumVariantAFs += varAFHere;

            totalHetSites += numReferenceAlleles == 1 ? 1 : 0;
            totalHomVarSites += numReferenceAlleles == 0 ? 1 : 0;
            totalHomRefSites += numReferenceAlleles == 2 ? 1 : 0;

        }
    }

    if (!vc.hasGenotypes()) {
        // comp  ( sites only thousand genomes )
        this.totalCalledSites += 1;
        this.sumVariantAFs += vc.getAttributeAsDouble("AF", 0.0);
    }
}
 
Example 13
Source File: StrelkaPostProcess.java    From hmftools with GNU General Public License v3.0 5 votes vote down vote up
@VisibleForTesting
static int qualityScore(@NotNull final VariantContext variant) {
    if (variant.isSNP()) {
        return getIntField(variant, SNP_QUAL_FIELD);
    } else if (variant.isIndel()) {
        return getIntField(variant, INDEL_QUAL_FIELD);
    } else {
        throw new IllegalStateException("record is not indel or snp: " + variant);
    }
}
 
Example 14
Source File: StrelkaAllelicDepth.java    From hmftools with GNU General Public License v3.0 5 votes vote down vote up
@NotNull
private static Function<Allele, String> alleleKey(@NotNull final VariantContext variant) {
    if (variant.isSNP()) {
        return StrelkaAllelicDepth::snpAlleleKey;
    } else if (variant.isIndel()) {
        return StrelkaAllelicDepth::indelAlleleKey;
    } else {
        throw new IllegalStateException("record is neither indel nor snp: " + variant);
    }
}
 
Example 15
Source File: SplitVcfs.java    From picard with MIT License 4 votes vote down vote up
@Override
protected int doWork() {
    IOUtil.assertFileIsReadable(INPUT);
    final ProgressLogger progress = new ProgressLogger(log, 10000);

    final VCFFileReader fileReader = new VCFFileReader(INPUT, false);
    final VCFHeader fileHeader = fileReader.getFileHeader();

    final SAMSequenceDictionary sequenceDictionary =
            SEQUENCE_DICTIONARY != null
                    ? SamReaderFactory.makeDefault().referenceSequence(REFERENCE_SEQUENCE).getFileHeader(SEQUENCE_DICTIONARY).getSequenceDictionary()
                    : fileHeader.getSequenceDictionary();
    if (CREATE_INDEX && sequenceDictionary == null) {
        throw new PicardException("A sequence dictionary must be available (either through the input file or by setting it explicitly) when creating indexed output.");
    }

    final VariantContextWriterBuilder builder = new VariantContextWriterBuilder()
            .setReferenceDictionary(sequenceDictionary)
            .clearOptions();
    if (CREATE_INDEX)
        builder.setOption(Options.INDEX_ON_THE_FLY);

    final VariantContextWriter snpWriter = builder.setOutputFile(SNP_OUTPUT).build();
    final VariantContextWriter indelWriter = builder.setOutputFile(INDEL_OUTPUT).build();
    snpWriter.writeHeader(fileHeader);
    indelWriter.writeHeader(fileHeader);

    int incorrectVariantCount = 0;

    final CloseableIterator<VariantContext> iterator = fileReader.iterator();
    while (iterator.hasNext()) {
        final VariantContext context = iterator.next();
        if (context.isIndel()) indelWriter.add(context);
        else if (context.isSNP()) snpWriter.add(context);
        else {
            if (STRICT) throw new IllegalStateException("Found a record with type " + context.getType().name());
            else incorrectVariantCount++;
        }

        progress.record(context.getContig(), context.getStart());
    }

    if (incorrectVariantCount > 0) {
        log.debug("Found " + incorrectVariantCount + " records that didn't match SNP or INDEL");
    }

    CloserUtil.close(iterator);
    CloserUtil.close(fileReader);
    snpWriter.close();
    indelWriter.close();

    return 0;
}
 
Example 16
Source File: FastaAlternateReferenceMaker.java    From gatk with BSD 3-Clause "New" or "Revised" License 4 votes vote down vote up
private byte[] handlePosition(SimpleInterval interval, byte base, FeatureContext features) {
    if (deletionBasesRemaining > 0) {
        deletionBasesRemaining--;
        return NO_BASES;
    }

    // If we have a mask at this site, use it
    if ( snpmaskPriority ){
        if (isMasked(features) )
            return N_BYTES;
    }

    // Check to see if we have a called snp
    for ( final VariantContext vc : features.getValues(variants) ) {
        if ( vc.isFiltered() || vc.getStart() != interval.getStart()  )
            continue;

        if ( vc.isSimpleDeletion()) {
            deletionBasesRemaining = vc.getReference().length() - 1;
            // delete the next n bases, not this one
            return baseToByteArray(base);
        } else if ( vc.isSimpleInsertion() || vc.isSNP() ) {
            // Get the first alt allele that is not a spanning deletion. If none present, use the empty allele
            final Optional<Allele> optionalAllele = getFirstConcreteAltAllele(vc.getAlternateAlleles());
            final Allele allele = optionalAllele.orElseGet(() -> Allele.create(EMPTY_BASE, false));
            if ( vc.isSimpleInsertion() ) {
                return allele.getBases();
            } else {
                final String iupacBase = (iupacSample != null) ? getIUPACBase(vc.getGenotype(iupacSample)) : allele.toString();
                return iupacBase.getBytes();
            }
        }
    }

    if ( !snpmaskPriority ){
        if ( isMasked(features)) {
            return N_BYTES;
        }
    }

    // if we got here then we're just ref
    return baseToByteArray(base);
}
 
Example 17
Source File: HaplotypeMap.java    From picard with MIT License 4 votes vote down vote up
/**
 * Constructs a HaplotypeMap from the provided file.
 */

private void fromVcf(final File file) {
    try ( final VCFFileReader reader = new VCFFileReader(file, false)) {

        final SAMSequenceDictionary dict = reader.getFileHeader().getSequenceDictionary();

        if (dict == null || dict.getSequences().isEmpty()) {
            throw new IllegalStateException("Haplotype map VCF file must contain header: " + file.getAbsolutePath());
        }

        initialize(new SAMFileHeader(dict));

        final Map<String, HaplotypeBlock> anchorToHaplotype = new HashMap<>();

        for (final VariantContext vc : reader) {

            if (vc.getNSamples() > 1) {
                throw new IllegalStateException("Haplotype map VCF file must contain at most one sample: " + file.getAbsolutePath());
            }

            final Genotype gc = vc.getGenotype(0); // may be null
            final boolean hasGc = gc != null;

            if (vc.getAlternateAlleles().size() != 1) {
                throw new IllegalStateException("Haplotype map VCF file must contain exactly one alternate allele per site: " + vc.toString());
            }

            if (!vc.isSNP()) {
                throw new IllegalStateException("Haplotype map VCF file must contain only SNPs: " + vc.toString());
            }

            if (!vc.hasAttribute(VCFConstants.ALLELE_FREQUENCY_KEY)) {
                throw new IllegalStateException("Haplotype map VCF Variants must have an '"+ VCFConstants.ALLELE_FREQUENCY_KEY + "' INFO field: " + vc.toString());
            }


            if (hasGc && gc.isPhased() && !gc.hasExtendedAttribute(VCFConstants.PHASE_SET_KEY)) {
                throw new IllegalStateException("Haplotype map VCF Variants' genotypes that are phased must have a PhaseSet (" + VCFConstants.PHASE_SET_KEY+")" + vc.toString());
            }

            if (hasGc && gc.isPhased() && !gc.isHet()) {
                throw new IllegalStateException("Haplotype map VCF Variants' genotypes that are phased must be HET" + vc.toString());
            }

            // Then parse them out
            final String chrom = vc.getContig();
            final int pos = vc.getStart();
            final String name = vc.getID();

            final byte ref = vc.getReference().getBases()[0];
            final byte var = vc.getAlternateAllele(0).getBases()[0];

            final double temp_maf = vc.getAttributeAsDouble(VCFConstants.ALLELE_FREQUENCY_KEY, 0D);
            final boolean swapped = hasGc && !gc.getAllele(0).equals(vc.getReference());

            final byte major, minor;
            final double maf;

            if (swapped) {
                major = var;
                minor = ref;
                maf = 1 - temp_maf;
            } else {
                major = ref;
                minor = var;
                maf = temp_maf;
            }

            final String anchor = anchorFromVc(vc);

            // If it's the anchor snp, start the haplotype
            if (!anchorToHaplotype.containsKey(anchor)) {
                final HaplotypeBlock newBlock = new HaplotypeBlock(maf);
                anchorToHaplotype.put(anchor, newBlock);
            }
            final HaplotypeBlock block = anchorToHaplotype.get(anchor);
            block.addSnp(new Snp(name, chrom, pos, major, minor, maf, null));
        }

        // And add them all now that they are all ready.
        fromHaplotypes(anchorToHaplotype.values());
    }
}
 
Example 18
Source File: DbSnpBitSetUtil.java    From picard with MIT License 4 votes vote down vote up
/** Private helper method to read through the VCF and create one or more bit sets. */
private static void loadVcf(final File dbSnpFile,
                            final SAMSequenceDictionary sequenceDictionary,
                            final Map<DbSnpBitSetUtil, Set<VariantType>> bitSetsToVariantTypes,
                            final IntervalList intervals,
                            final Optional<Log> log) {

    final Optional<ProgressLogger> progress = log.map(l -> new ProgressLogger(l, (int) 1e5, "Read", "variants"));
    final VCFFileReader variantReader = new VCFFileReader(dbSnpFile, intervals != null);
    final Iterator<VariantContext> variantIterator;
    if (intervals != null) {
        variantIterator = new ByIntervalListVariantContextIterator(variantReader, intervals);
    }
    else {
        variantIterator = variantReader.iterator();
    }

    while (variantIterator.hasNext()) {
        final VariantContext kv = variantIterator.next();

        for (final Map.Entry<DbSnpBitSetUtil, Set<VariantType>> tuple : bitSetsToVariantTypes.entrySet()) {
            final DbSnpBitSetUtil bitset            = tuple.getKey();
            final Set<VariantType> variantsToMatch  = tuple.getValue();

            BitSet bits = bitset.sequenceToBitSet.get(kv.getContig());
            if (bits == null) {
                final int nBits;
                if (sequenceDictionary == null) nBits = kv.getEnd() + 1;
                else nBits = sequenceDictionary.getSequence(kv.getContig()).getSequenceLength() + 1;
                bits = new BitSet(nBits);
                bitset.sequenceToBitSet.put(kv.getContig(), bits);
            }
            if (variantsToMatch.isEmpty() ||
                    (kv.isSNP() && variantsToMatch.contains(VariantType.SNP)) ||
                    (kv.isIndel() && variantsToMatch.contains(VariantType.insertion)) ||
                    (kv.isIndel() && variantsToMatch.contains(VariantType.deletion))) {

                for (int i = kv.getStart(); i <= kv.getEnd(); i++)  bits.set(i, true);
            }
        }
        progress.map(p -> p.record(kv.getContig(), kv.getStart()));
    }

    CloserUtil.close(variantReader);
}
 
Example 19
Source File: AssemblyRegionTrimmer.java    From gatk-protected with BSD 3-Clause "New" or "Revised" License 4 votes vote down vote up
/**
 * Returns a trimming result object from which the variant trimmed region and flanking non-variant sections
 * can be recovered latter.
 *
 * @param originalRegion the genome location range to trim.
 * @param allVariantsWithinExtendedRegion list of variants contained in the trimming location. Variants therein
 *                                        not overlapping with {@code originalRegion} are simply ignored.
 * @return never {@code null}.
 */
public Result trim(final AssemblyRegion originalRegion,
                   final SortedSet<VariantContext> allVariantsWithinExtendedRegion) {


    if ( allVariantsWithinExtendedRegion.isEmpty() ) // no variants,
    {
        return Result.noVariation(emitReferenceConfidence, originalRegion, assemblyRegionTrimmerArgs.snpPadding, usableExtension);
    }

    final List<VariantContext> withinActiveRegion = new LinkedList<>();
    final SimpleInterval originalRegionRange = originalRegion.getSpan();
    boolean foundNonSnp = false;
    SimpleInterval variantSpan = null;
    for ( final VariantContext vc : allVariantsWithinExtendedRegion ) {
        final SimpleInterval vcLoc = new SimpleInterval(vc);
        if ( originalRegionRange.overlaps(vcLoc) ) {
            foundNonSnp = foundNonSnp || ! vc.isSNP();
            variantSpan = variantSpan == null ? vcLoc : variantSpan.spanWith(vcLoc);
            withinActiveRegion.add(vc);
        }
    }
    final int padding = foundNonSnp ? assemblyRegionTrimmerArgs.indelPadding : assemblyRegionTrimmerArgs.snpPadding;

    // we don't actually have anything in the region after skipping out variants that don't overlap
    // the region's full location
    if ( variantSpan == null ) {
        return Result.noVariation(emitReferenceConfidence, originalRegion, padding, usableExtension);
    }

    if ( assemblyRegionTrimmerArgs.dontTrimActiveRegions) {
        return Result.noTrimming(emitReferenceConfidence, originalRegion, padding, usableExtension, withinActiveRegion);
    }

    final SimpleInterval maximumSpan = originalRegionRange.expandWithinContig(usableExtension, sequenceDictionary);
    final SimpleInterval idealSpan = variantSpan.expandWithinContig(padding, sequenceDictionary);
    final SimpleInterval finalSpan = maximumSpan.intersect(idealSpan).mergeWithContiguous(variantSpan);

    // Make double sure that, if we are emitting GVCF we won't call non-variable positions beyond the target active region span.
    // In regular call we don't do so so we don't care and we want to maintain behavior, so the conditional.
    final SimpleInterval callableSpan = emitReferenceConfidence ? variantSpan.intersect(originalRegionRange) : variantSpan;

    final Pair<SimpleInterval, SimpleInterval> nonVariantRegions = nonVariantTargetRegions(originalRegion, callableSpan);

    if ( debug ) {
        logger.info("events       : " + withinActiveRegion);
        logger.info("region       : " + originalRegion);
        logger.info("callableSpan : " + callableSpan);
        logger.info("padding      : " + padding);
        logger.info("idealSpan    : " + idealSpan);
        logger.info("maximumSpan  : " + maximumSpan);
        logger.info("finalSpan    : " + finalSpan);
    }

    return new Result(emitReferenceConfidence,true,originalRegion,padding, usableExtension,withinActiveRegion,nonVariantRegions,finalSpan,idealSpan,maximumSpan,variantSpan);
}
 
Example 20
Source File: ThetaVariantEvaluator.java    From gatk with BSD 3-Clause "New" or "Revised" License 4 votes vote down vote up
public void update1(VariantContext vc, final ReferenceContext referenceContext, final ReadsContext readsContext, final FeatureContext featureContext) {
    if (vc == null || !vc.isSNP() || (getWalker().ignoreAC0Sites() && vc.isMonomorphicInSamples())) {
        return;
    }

    //this maps allele to a count
    ConcurrentMap<String, Integer> alleleCounts = new ConcurrentHashMap<String, Integer>();

    int numHetsHere = 0;
    int numGenosHere = 0;
    int numIndsHere = 0;

    for (final Genotype genotype : vc.getGenotypes()) {
        numIndsHere++;
        if (!genotype.isNoCall()) {
            //increment stats for heterozygosity
            if (genotype.isHet()) {
                numHetsHere++;
            }

            numGenosHere++;
            //increment stats for pairwise mismatches

            for (Allele allele : genotype.getAlleles()) {
                if (allele.isCalled()) {
                    String alleleString = allele.toString();
                    alleleCounts.putIfAbsent(alleleString, 0);
                    alleleCounts.put(alleleString, alleleCounts.get(alleleString) + 1);
                }
            }
        }
    }
    if (numGenosHere > 0) {
        //only if have one called genotype at least
        this.numSites++;

        this.totalHet += numHetsHere / (double)numGenosHere;

        //compute based on num sites
        float harmonicFactor = 0;
        for (int i = 1; i <= numIndsHere; i++) {
            harmonicFactor += 1.0 / i;
        }
        this.thetaRegionNumSites += 1.0 / harmonicFactor;

        //now compute pairwise mismatches
        float numPairwise = 0;
        int numDiffs = 0;
        for (String allele1 : alleleCounts.keySet()) {
            int allele1Count = alleleCounts.get(allele1);

            for (String allele2 : alleleCounts.keySet()) {
                if (allele1.compareTo(allele2) < 0) {
                    continue;
                }
                if (allele1 .compareTo(allele2) == 0) {
                    numPairwise += allele1Count * (allele1Count - 1) * .5;

                }
                else {
                    int allele2Count = alleleCounts.get(allele2);
                    numPairwise += allele1Count * allele2Count;
                    numDiffs += allele1Count * allele2Count;
                }
            }
        }

        if (numPairwise > 0) {
            this.totalAvgDiffs += numDiffs / numPairwise;
        }
    }
}