Java Code Examples for htsjdk.variant.variantcontext.VariantContext#getReference()

The following examples show how to use htsjdk.variant.variantcontext.VariantContext#getReference() . You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: SamRecordScoring.java    From hmftools with GNU General Public License v3.0 6 votes vote down vote up
@NotNull
private static ReadType getReadType(@NotNull final SAMRecord record, @NotNull final VariantContext variant) {
    final Allele alt = variant.getAlternateAllele(0);
    final Allele ref = variant.getReference();
    final int recordIdxOfVariantStart = record.getReadPositionAtReferencePosition(variant.getStart());
    if (recordIdxOfVariantStart == 0) {
        // Variant position was deleted
        return ReadType.MISSING;

    }
    if (variant.isSNP()) {
        if (record.getReadString().charAt(recordIdxOfVariantStart - 1) == alt.getBaseString().charAt(0)) {
            return ReadType.ALT;
        } else {
            return ReadType.REF;
        }
    }
    if (variant.isSimpleInsertion()) {
        return SAMRecords.containsInsert(record, variant.getStart(), alt.getBaseString()) ? ReadType.ALT : ReadType.REF;
    }
    if (variant.isSimpleDeletion()) {
        return SAMRecords.containsDelete(record, variant.getStart(), ref.getBaseString()) ? ReadType.ALT : ReadType.REF;
    }
    return ReadType.OTHER;
}
 
Example 2
Source File: StrandBiasTest.java    From gatk with BSD 3-Clause "New" or "Revised" License 6 votes vote down vote up
/**
 Allocate and fill a 2x2 strand contingency table.  In the end, it'll look something like this:
 *             fw      rc
 *   allele1   #       #
 *   allele2   #       #
 * @return a 2x2 contingency table
 */
public static int[][] getContingencyTable( final AlleleLikelihoods<GATKRead, Allele> likelihoods,
                                           final VariantContext vc,
                                           final int minCount,
                                           final Collection<String> samples) {
    if( likelihoods == null || vc == null) {
        return null;
    }

    final Allele ref = vc.getReference();
    final List<Allele> allAlts = vc.getAlternateAlleles();

    final int[][] table = new int[ARRAY_DIM][ARRAY_DIM];
    for (final String sample : samples) {
        final int[] sampleTable = new int[ARRAY_SIZE];
        likelihoods.bestAllelesBreakingTies(sample).stream()
                .filter(ba -> ba.isInformative())
                .forEach(ba -> updateTable(sampleTable, ba.allele, ba.evidence, ref, allAlts));
        if (passesMinimumThreshold(sampleTable, minCount)) {
            copyToMainTable(sampleTable, table);
        }
    }

    return table;
}
 
Example 3
Source File: StrandBiasUtils.java    From gatk with BSD 3-Clause "New" or "Revised" License 6 votes vote down vote up
/**
 Allocate and fill a Nx2 strand contingency table where N is the number of alleles.  In the end, it'll look something like this:
 *             fwd      rev
 *   allele1   #       #
 *   allele2   #       #
 *
 *   NOTE:Only use informative reads
 *
 * @param vc    VariantContext from which to get alleles
 * @param likelihoods per-read allele likelihoods to determine if each read is informative
 * @param perAlleleValues modified to store the output counts
 * @param minCount minimum threshold of counts to use
 */
public static void getStrandCountsFromLikelihoodMap( final VariantContext vc,
                                              final AlleleLikelihoods<GATKRead, Allele> likelihoods,
                                              final ReducibleAnnotationData<List<Integer>> perAlleleValues,
                                              final int minCount) {
    if( likelihoods == null || vc == null ) {
        return;
    }

    final Allele ref = vc.getReference();
    final List<Allele> allAlts = vc.getAlternateAlleles();

    for (final String sample : likelihoods.samples()) {
        final ReducibleAnnotationData<List<Integer>> sampleTable = new AlleleSpecificAnnotationData<>(vc.getAlleles(),null);
        likelihoods.bestAllelesBreakingTies(sample).stream()
                .filter(ba -> ba.isInformative())
                .forEach(ba -> updateTable(ba.allele, ba.evidence, ref, allAlts, sampleTable));
        if (passesMinimumThreshold(sampleTable, minCount)) {
            combineAttributeMap(sampleTable, perAlleleValues);
        }
    }
}
 
Example 4
Source File: LiftoverVcf.java    From picard with MIT License 5 votes vote down vote up
/**
 *  utility function to attempt to add a variant. Checks that the reference allele still matches the reference (which may have changed)
 *
 * @param vc new {@link VariantContext}
 * @param refSeq {@link ReferenceSequence} of new reference
 * @param source the original {@link VariantContext} to use for putting the original location information into vc
 */
private void tryToAddVariant(final VariantContext vc, final ReferenceSequence refSeq, final VariantContext source) {
    if (!refSeq.getName().equals(vc.getContig())) {
        throw new IllegalStateException("The contig of the VariantContext, " + vc.getContig() + ", doesnt match the ReferenceSequence: " + refSeq.getName());
    }

    // Check that the reference allele still agrees with the reference sequence
    final boolean mismatchesReference;
    final Allele allele = vc.getReference();
    final byte[] ref = refSeq.getBases();
    final String refString = StringUtil.bytesToString(ref, vc.getStart() - 1, vc.getEnd() - vc.getStart() + 1);

    if (!refString.equalsIgnoreCase(allele.getBaseString())) {
        // consider that the ref and the alt may have been swapped in a simple biallelic SNP
        if (vc.isBiallelic() && vc.isSNP() && refString.equalsIgnoreCase(vc.getAlternateAllele(0).getBaseString())) {
            totalTrackedAsSwapRefAlt++;
            if (RECOVER_SWAPPED_REF_ALT) {
                addAndTrack(LiftoverUtils.swapRefAlt(vc, TAGS_TO_REVERSE, TAGS_TO_DROP), source);
                return;
            }
        }
        mismatchesReference = true;
    } else {
        mismatchesReference = false;
    }


    if (mismatchesReference) {
        rejectedRecords.add(new VariantContextBuilder(source)
                .filter(FILTER_MISMATCHING_REF_ALLELE)
                .attribute(ATTEMPTED_LOCUS, String.format("%s:%d-%d", vc.getContig(), vc.getStart(), vc.getEnd()))
                .attribute(ATTEMPTED_ALLELES, vc.getReference().toString() + "->" + String.join(",", vc.getAlternateAlleles().stream().map(Allele::toString).collect(Collectors.toList())))
                .make());
        failedAlleleCheck++;
        trackLiftedVariantContig(rejectsByContig, source.getContig());
    } else {
        addAndTrack(vc, source);
    }
}
 
Example 5
Source File: LiftoverUtils.java    From picard with MIT License 5 votes vote down vote up
private static boolean isIndelForLiftover(final VariantContext vc) {
    final Allele ref = vc.getReference();
    if (ref.length() != 1) {
        return true;
    }

    return vc.getAlleles().stream()
            .filter(a -> !a.isSymbolic())
            .filter(a -> !a.equals(Allele.SPAN_DEL))
            .anyMatch(a -> a.length() != 1);
}
 
Example 6
Source File: EventMap.java    From gatk with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
/**
 * Create a block substitution out of two variant contexts that start at the same position
 *
 * vc1 can be SNP, and vc2 can then be either a insertion or deletion.
 * If vc1 is an indel, then vc2 must be the opposite type (vc1 deletion => vc2 must be an insertion)
 *
 * @param vc1 the first variant context we want to merge
 * @param vc2 the second
 * @return a block substitution that represents the composite substitution implied by vc1 and vc2
 */
protected VariantContext makeBlock(final VariantContext vc1, final VariantContext vc2) {
    Utils.validateArg( vc1.getStart() == vc2.getStart(), () -> "vc1 and 2 must have the same start but got " + vc1 + " and " + vc2);
    Utils.validateArg( vc1.isBiallelic(), "vc1 must be biallelic");
    if ( ! vc1.isSNP() ) {
        Utils.validateArg ( (vc1.isSimpleDeletion() && vc2.isSimpleInsertion()) || (vc1.isSimpleInsertion() && vc2.isSimpleDeletion()),
                () -> "Can only merge single insertion with deletion (or vice versa) but got " + vc1 + " merging with " + vc2);
    } else {
        Utils.validateArg(!vc2.isSNP(), () -> "vc1 is " + vc1 + " but vc2 is a SNP, which implies there's been some terrible bug in the cigar " + vc2);
    }

    final Allele ref, alt;
    final VariantContextBuilder b = new VariantContextBuilder(vc1);
    if ( vc1.isSNP() ) {
        // we have to repair the first base, so SNP case is special cased
        if ( vc1.getReference().equals(vc2.getReference()) ) {
            // we've got an insertion, so we just update the alt to have the prev alt
            ref = vc1.getReference();
            alt = Allele.create(vc1.getAlternateAllele(0).getDisplayString() + vc2.getAlternateAllele(0).getDisplayString().substring(1), false);
        } else {
            // we're dealing with a deletion, so we patch the ref
            ref = vc2.getReference();
            alt = vc1.getAlternateAllele(0);
            b.stop(vc2.getEnd());
        }
    } else {
        final VariantContext insertion = vc1.isSimpleInsertion() ? vc1 : vc2;
        final VariantContext deletion  = vc1.isSimpleInsertion() ? vc2 : vc1;
        ref = deletion.getReference();
        alt = insertion.getAlternateAllele(0);
        b.stop(deletion.getEnd());
    }

    return b.alleles(Arrays.asList(ref, alt)).make();
}
 
Example 7
Source File: GVCFBlock.java    From gatk with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
public GVCFBlock(final VariantContext startingVC, final int lowerGQBound, final int upperGQBound) {
    Utils.nonNull(startingVC, "startingVC cannot be null");
    this.startingVC = startingVC;
    this.minGQ = lowerGQBound;
    this.maxGQ = upperGQBound;
    this.ref = startingVC.getReference();
    this.end = getStart() - 1;
}
 
Example 8
Source File: VariantAnnotator.java    From gatk with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
private AlleleLikelihoods<GATKRead, Allele> makeLikelihoods(final VariantContext vc, final ReadsContext readsContext) {
    final List<GATKRead> reads = Utils.stream(readsContext).collect(Collectors.toList());
    final AlleleLikelihoods<GATKRead, Allele> result = new AlleleLikelihoods<>(variantSamples,
            new IndexedAlleleList<>(vc.getAlleles()), AssemblyBasedCallerUtils.splitReadsBySample(variantSamples, getHeaderForReads(), reads));

    final ReadPileup readPileup = new ReadPileup(vc, readsContext);
    final Map<String, ReadPileup> pileupsBySample = readPileup.splitBySample(getHeaderForReads(), "__UNKNOWN__");

    final Allele refAllele = vc.getReference();
    final List<Allele> altAlleles = vc.getAlternateAlleles();
    final int numAlleles = vc.getNAlleles();

    // manually fill each sample's likelihoods one read at a time, assigning probability 1 (0 in log space) to the matching allele, if present
    for (final Map.Entry<String, ReadPileup> samplePileups : pileupsBySample.entrySet()) {
        final LikelihoodMatrix<GATKRead, Allele> sampleMatrix = result.sampleMatrix(result.indexOfSample(samplePileups.getKey()));

        final MutableInt readIndex = new MutableInt(0);
        for (final PileupElement pe : samplePileups.getValue()) {
            // initialize all alleles as equally unlikely
            IntStream.range(0, numAlleles).forEach(a -> sampleMatrix.set(a, readIndex.intValue(), Double.NEGATIVE_INFINITY));

            // if present set the matching allele as likely
            final Allele pileupAllele = GATKVariantContextUtils.chooseAlleleForRead(pe, refAllele, altAlleles, minBaseQualityScore);
            if (pileupAllele != null) {
                sampleMatrix.set(sampleMatrix.indexOfAllele(pileupAllele), readIndex.intValue(), 0);
            }


            readIndex.increment();
        }
    }

    return result;
}
 
Example 9
Source File: IndelLengthHistogram.java    From gatk with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
@Override
public void update1(final VariantContext eval, final ReferenceContext referenceContext, final ReadsContext readsContext, final FeatureContext featureContext) {
    if ( eval.isIndel() && ! eval.isComplexIndel() ) {
        if ( ! ( getWalker().ignoreAC0Sites() && eval.isMonomorphicInSamples() )) {
            // only if we are actually polymorphic in the subsetted samples should we count the allele
            for ( Allele alt : eval.getAlternateAlleles() ) {
                final int alleleSize = alt.length() - eval.getReference().length();
                if ( alleleSize == 0 ) throw new GATKException("Allele size not expected to be zero for indel: alt = " + alt + " ref = " + eval.getReference());
                updateLengthHistogram(eval.getReference(), alt);
            }
        }
    }
}
 
Example 10
Source File: ArHetvarFilter.java    From gatk with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
private boolean variantContextsMatch(VariantContext v1, VariantContext v2) {
    return v1.getContig().equals(v2.getContig())
            && v1.getStart() == v2.getStart()
            && v1.getEnd() == v2.getEnd()
            && v1.getReference() == v2.getReference()
            && v1.getAlternateAlleles().size() == v2.getAlternateAlleles().size()
            && v1.getAlternateAlleles().containsAll(v2.getAlternateAlleles());
}
 
Example 11
Source File: ValidateBasicSomaticShortMutations.java    From gatk with BSD 3-Clause "New" or "Revised" License 4 votes vote down vote up
@Override
public void apply(VariantContext discoveryVariantContext, ReadsContext readsContext, ReferenceContext referenceContext, FeatureContext featureContext) {

    final Genotype genotype = discoveryVariantContext.getGenotype(discoverySampleInVcf);

    // Skip any symbolic reference alleles
    final Allele referenceAllele = discoveryVariantContext.getReference();
    if (referenceAllele.isSymbolic()) {
        logger.warn("Skipping variant with symbolic reference allele: " + discoveryVariantContext);
        return;
    }

    // If we cannot validate this genotype, we should simply skip it.
    final boolean isAbleToValidate = BasicSomaticShortMutationValidator.isAbleToValidateGenotype(genotype, referenceAllele);
    if (!isAbleToValidate) {
        if (annotatedVcf != null) {
            vcfWriter.add(new VariantContextBuilder(discoveryVariantContext).attribute(JUDGMENT_INFO_FIELD_KEY, Judgment.SKIPPED).make());
        }
        return;
    }

    // We assume that there is only one alternate allele that we are interested in.
    // Multiallelics are not supported.
    final Allele altAllele = genotype.getAllele(1);

    final SAMFileHeader samFileHeader = getHeaderForReads();

    final ReadPileup readPileup = new ReadPileup(discoveryVariantContext, readsContext);
    final Map<String, ReadPileup> pileupsBySample = readPileup.splitBySample(samFileHeader, "__UNKNOWN__");

    // This could happen when read realignment moves reads such that a particular locus has zero reads
    if (pileupsBySample.isEmpty()){
        return;
    }

    final ReadPileup validationNormalPileup = pileupsBySample.get(validationControlName);
    // TODO: handle this case more carefully
    if (validationNormalPileup == null){
        return;
    }

    final ReadPileup validationTumorPileup = pileupsBySample.get(validationCaseName);

    final Map<Allele, MutableInt> validationTumorAllelicCounts = new AllelePileupCounter(referenceAllele, discoveryVariantContext.getAlternateAlleles(), minBqCutoff, validationTumorPileup)
            .getCountMap();

    final int validationTumorAltCount = validationTumorAllelicCounts.getOrDefault(altAllele, new MutableInt(0)).intValue();
    final int validationTumorRefCount = validationTumorAllelicCounts.get(referenceAllele).intValue();

    final String filterString = discoveryVariantContext.getFilters().stream().sorted().collect(Collectors.joining(";"));
    final BasicValidationResult basicValidationResult = BasicSomaticShortMutationValidator.calculateBasicValidationResult(
            genotype, referenceAllele, validationNormalPileup, validationTumorAltCount,
            validationTumorRefCount + validationTumorAltCount, minBqCutoff,
            new SimpleInterval(discoveryVariantContext.getContig(), discoveryVariantContext.getStart(), discoveryVariantContext.getEnd()),
            filterString);

    if (basicValidationResult != null) {
        results.add(basicValidationResult);
    }

    final boolean normalArtifact = basicValidationResult.getNumAltSupportingReadsInNormal() > maxValidationNormalCount;
    final boolean validated = !normalArtifact && basicValidationResult != null && basicValidationResult.isOutOfNoiseFloor();
    final boolean powered = normalArtifact || (basicValidationResult != null && basicValidationResult.getPower() > minPower);

    if (discoveryVariantContext.isSNP()) {
        if (validated) {
            snpTruePositiveCount.increment();
        } else if (powered) {
            snpFalsePositiveCount.increment();
        }
    } else {
        if (validated) {
            indelTruePositiveCount.increment();
        } else if (powered) {
            indelFalsePositiveCount.increment();
        }
    }

    if (annotatedVcf != null) {
        vcfWriter.add(new VariantContextBuilder(discoveryVariantContext)
                .attribute(JUDGMENT_INFO_FIELD_KEY, validated ? Judgment.VALIDATED : Judgment.UNVALIDATED)
                .attribute(POWER_INFO_FIELD_KEY, basicValidationResult == null ? 0 : basicValidationResult.getPower())
                .attribute(VALIDATION_AD_INFO_FIELD_KEY, new int[] {validationTumorRefCount, validationTumorAltCount}).make());
    }
}
 
Example 12
Source File: ValidateVariants.java    From gatk with BSD 3-Clause "New" or "Revised" License 4 votes vote down vote up
@Override
public void apply(final VariantContext vc, final ReadsContext readsContext, final ReferenceContext ref, final FeatureContext featureContext) {
    if (DO_NOT_VALIDATE_FILTERED && vc.isFiltered()) {
        return;
    }
    // get the true reference allele
    final Allele reportedRefAllele = vc.getReference();
    final int refLength = reportedRefAllele.length();

    final Allele observedRefAllele = hasReference() ? Allele.create(Arrays.copyOf(ref.getBases(), refLength)) : null;

    final Set<String> rsIDs = getRSIDs(featureContext);

    if (VALIDATE_GVCF) {
        final SimpleInterval refInterval = ref.getInterval();

        validateVariantsOrder(vc);

        // GenomeLocSortedSet will automatically merge intervals that are overlapping when setting `mergeIfIntervalOverlaps`
        // to true.  In a GVCF most blocks are adjacent to each other so they wouldn't normally get merged.  We check
        // if the current record is adjacent to the previous record and "overlap" them if they are so our set is as
        // small as possible while still containing the same bases.
        final int start = (previousInterval != null && previousInterval.overlapsWithMargin(refInterval, 1)) ?
                previousInterval.getStart() : refInterval.getStart();
        final int end = (previousInterval != null && previousInterval.overlapsWithMargin(refInterval, 1)) ?
                Math.max(previousInterval.getEnd(), vc.getEnd()) : vc.getEnd();
        final GenomeLoc possiblyMergedGenomeLoc = genomeLocSortedSet.getGenomeLocParser().createGenomeLoc(refInterval.getContig(), start, end);
        genomeLocSortedSet.add(possiblyMergedGenomeLoc, true);

        previousInterval = new SimpleInterval(possiblyMergedGenomeLoc);
        previousStart = vc.getStart();
        validateGVCFVariant(vc);
    }

    for (final ValidationType t : validationTypes) {
        try{
            applyValidationType(vc, reportedRefAllele, observedRefAllele, rsIDs, t);
        } catch (TribbleException e) {
            throwOrWarn(new UserException.FailsStrictValidation(drivingVariantFile, t, e.getMessage()));
        }
    }
}