Java Code Examples for htsjdk.variant.variantcontext.Genotype#isCalled()

The following examples show how to use htsjdk.variant.variantcontext.Genotype#isCalled() . You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: MendelianViolation.java    From gatk with BSD 3-Clause "New" or "Revised" License 6 votes vote down vote up
/**
 * Evaluate the genotypes of mom, dad, and child to detect Mendelian violations
 *
 * @param gMom
 * @param gDad
 * @param gChild
 * @return true if the three genotypes represent a Mendelian violation; false otherwise
 */
public static boolean isViolation(final Genotype gMom, final Genotype gDad, final Genotype gChild) {
    if (gChild.isNoCall()){ //cannot possibly be a violation is child is no call
        return false;
    }
    if(gMom.isHomRef() && gDad.isHomRef() && gChild.isHomRef()) {
        return false;
    }

    //1 parent is no "call
    if(!gMom.isCalled()){
        return (gDad.isHomRef() && gChild.isHomVar()) || (gDad.isHomVar() && gChild.isHomRef());
    }
    else if(!gDad.isCalled()){
        return (gMom.isHomRef() && gChild.isHomVar()) || (gMom.isHomVar() && gChild.isHomRef());
    }
    //Both parents have genotype information
    final Allele childRef = gChild.getAlleles().get(0);
    return !(gMom.getAlleles().contains(childRef) && gDad.getAlleles().contains(gChild.getAlleles().get(1)) ||
        gMom.getAlleles().contains(gChild.getAlleles().get(1)) && gDad.getAlleles().contains(childRef));
}
 
Example 2
Source File: DepthPerAlleleBySample.java    From gatk with BSD 3-Clause "New" or "Revised" License 6 votes vote down vote up
@Override
public void annotate(final ReferenceContext ref,
                     final VariantContext vc,
                     final Genotype g,
                     final GenotypeBuilder gb,
                     final AlleleLikelihoods<GATKRead, Allele> likelihoods) {
    Utils.nonNull(gb, "gb is null");
    Utils.nonNull(vc, "vc is null");

    if ( g == null || !g.isCalled() || likelihoods == null) {
        return;
    }
    final Set<Allele> alleles = new LinkedHashSet<>(vc.getAlleles());

    // make sure that there's a meaningful relationship between the alleles in the likelihoods and our VariantContext
    Utils.validateArg(likelihoods.alleles().containsAll(alleles), () -> "VC alleles " + alleles + " not a  subset of AlleleLikelihoods alleles " + likelihoods.alleles());

    gb.AD(annotateWithLikelihoods(vc, g, alleles, likelihoods));
}
 
Example 3
Source File: StrandBiasBySample.java    From gatk with BSD 3-Clause "New" or "Revised" License 6 votes vote down vote up
@Override
public void annotate(final ReferenceContext ref,
                     final VariantContext vc,
                     final Genotype g,
                     final GenotypeBuilder gb,
                     final AlleleLikelihoods<GATKRead, Allele> likelihoods) {
    Utils.nonNull(vc);
    Utils.nonNull(g);
    Utils.nonNull(gb);

    if ( likelihoods == null || !g.isCalled() ) {
        logger.warn("Annotation will not be calculated, genotype is not called or alleleLikelihoodMap is null");
        return;
    }

    final int[][] table = FisherStrand.getContingencyTable(likelihoods, vc, 0, Arrays.asList(g.getSampleName()));

    gb.attribute(GATKVCFConstants.STRAND_BIAS_BY_SAMPLE_KEY, getContingencyArray(table));
}
 
Example 4
Source File: SampleList.java    From gatk with BSD 3-Clause "New" or "Revised" License 6 votes vote down vote up
@Override
public Map<String, Object> annotate(final ReferenceContext ref,
                                    final VariantContext vc,
                                    final AlleleLikelihoods<GATKRead, Allele> likelihoods) {
    Utils.nonNull(vc);
    if ( vc.isMonomorphicInSamples() || !vc.hasGenotypes() ) {
        return Collections.emptyMap();
    }

    final StringBuilder samples = new StringBuilder();
    for ( final Genotype genotype : vc.getGenotypesOrderedByName() ) {
        if ( genotype.isCalled() && !genotype.isHomRef() ){
            if ( samples.length() > 0 ) {
                samples.append(",");
            }
            samples.append(genotype.getSampleName());
        }
    }

    if ( samples.length() == 0 ) {
        return Collections.emptyMap();
    }

    return Collections.singletonMap(getKeyNames().get(0), samples.toString());
}
 
Example 5
Source File: EvaluateCopyNumberTriStateCalls.java    From gatk-protected with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
private Genotype composeNonTruthOverlappingGenotype(final VariantContext enclosingContext, final Genotype genotype) {
    final GenotypeBuilder builder = new GenotypeBuilder(genotype.getSampleName());
    if (genotype.isCalled()) {
        GATKProtectedVariantContextUtils.setGenotypeQualityFromPLs(builder, genotype);
        final int[] PL = genotype.getPL();
        final int callAlleleIndex = GATKProtectedMathUtils.minIndex(PL);
        final double quality = callQuality(genotype);
        builder.alleles(Collections.singletonList(enclosingContext.getAlleles().get(callAlleleIndex)));
        builder.attribute(VariantEvaluationContext.CALL_QUALITY_KEY, quality);
        final boolean discovered = XHMMSegmentGenotyper.DISCOVERY_TRUE.equals(
                GATKProtectedVariantContextUtils.getAttributeAsString(genotype, XHMMSegmentGenotyper.DISCOVERY_KEY,
                        XHMMSegmentGenotyper.DISCOVERY_FALSE));
        if (callAlleleIndex != 0 && discovered) {
            builder.attribute(VariantEvaluationContext.EVALUATION_CLASS_KEY, EvaluationClass.UNKNOWN_POSITIVE.acronym);
        }
        if (quality < filterArguments.minimumCalledSegmentQuality) {
            builder.filter(EvaluationFilter.LowQuality.acronym);
        } else {
            builder.filter(EvaluationFilter.PASS);
        }
    } else { /* assume it is REF */
        /* TODO this is a hack to make Andrey's CODEX vcf work; and in general, VCFs that only include discovered
         * variants and NO_CALL (".") on other samples. The idea is to force the evaluation tool to take it call
         * as REF on all other samples. Otherwise, the effective allele frequency of the variant will be erroneously
         * high and will be filtered. */
        builder.alleles(Collections.singletonList(CopyNumberTriStateAllele.REF));
        builder.attribute(VariantEvaluationContext.CALL_QUALITY_KEY, 100000);
        builder.filter(EvaluationFilter.PASS);
    }
    return builder.make();
}
 
Example 6
Source File: VcfToAdpc.java    From picard with MIT License 5 votes vote down vote up
private IlluminaGenotype getIlluminaGenotype(final Genotype genotype, final VariantContext context) {
    final IlluminaGenotype illuminaGenotype;
    if (genotype.isCalled()) {
        // Note that we remove the trailing '*' that appears in alleles that are reference.
        final String illuminaAlleleA = StringUtils.stripEnd(getStringAttribute(context, InfiniumVcfFields.ALLELE_A), "*");
        final String illuminaAlleleB = StringUtils.stripEnd(getStringAttribute(context, InfiniumVcfFields.ALLELE_B), "*");
        if (genotype.getAlleles().size() != 2) {
            throw new PicardException("Unexpected number of called alleles in variant context " + context + " found alleles: " + genotype.getAlleles());
        }
        final Allele calledAllele1 = genotype.getAllele(0);
        final Allele calledAllele2 = genotype.getAllele(1);

        if (calledAllele1.basesMatch(illuminaAlleleA)) {
            if (calledAllele2.basesMatch(illuminaAlleleA)) {
                illuminaGenotype = picard.arrays.illumina.IlluminaGenotype.AA;
            } else if (calledAllele2.basesMatch(illuminaAlleleB)) {
                illuminaGenotype = picard.arrays.illumina.IlluminaGenotype.AB;
            } else {
                throw new PicardException("Error matching called alleles to Illumina alleles.  Context: " + context);
            }
        } else if (calledAllele1.basesMatch(illuminaAlleleB)) {
            if (calledAllele2.basesMatch(illuminaAlleleA)) {
                illuminaGenotype = picard.arrays.illumina.IlluminaGenotype.AB;
            } else if (calledAllele2.basesMatch(illuminaAlleleB)) {
                illuminaGenotype = picard.arrays.illumina.IlluminaGenotype.BB;
            } else {
                throw new PicardException("Error matching called alleles to Illumina alleles.  Context: " + context);
            }
        } else {
            // We didn't match up the illumina alleles to called alleles
            throw new PicardException("Error matching called alleles to Illumina alleles.  Context: " + context);
        }
    } else {
        illuminaGenotype = picard.arrays.illumina.IlluminaGenotype.NN;
    }
    return illuminaGenotype;
}
 
Example 7
Source File: MendelianViolationDetector.java    From picard with MIT License 5 votes vote down vote up
/** Tests to ensure that a locus is bi-allelic within the given set of samples. */
private boolean isVariant(final Genotype... gts) {
    for (final Genotype gt : gts) {
        if (gt.isCalled() && !gt.isHomRef()) return true;
    }

    return false;
}
 
Example 8
Source File: SelectVariants.java    From gatk with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
private boolean haveSameGenotypes(final Genotype g1, final Genotype g2) {
    if (g1 == null || g2 == null) {
        return false;
    }

    if ((g1.isCalled() && g2.isFiltered()) ||
            (g2.isCalled() && g1.isFiltered()) ||
            (g1.isFiltered() && g2.isFiltered() && XLfiltered)) {
        return false;
    }

    final List<Allele> a1s = g1.getAlleles();
    final List<Allele> a2s = g2.getAlleles();
    return (a1s.containsAll(a2s) && a2s.containsAll(a1s));
}
 
Example 9
Source File: DepthPerSampleHC.java    From gatk with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
@Override
public void annotate( final ReferenceContext ref,
                      final VariantContext vc,
                      final Genotype g,
                      final GenotypeBuilder gb,
                      final AlleleLikelihoods<GATKRead, Allele> likelihoods ) {
    Utils.nonNull(vc);
    Utils.nonNull(g);
    Utils.nonNull(gb);

    if ( likelihoods == null || !g.isCalled() ) {
        logger.warn("Annotation will not be calculated, genotype is not called or alleleLikelihoodMap is null");
        return;
    }

    // check that there are reads
    final String sample = g.getSampleName();
    if (likelihoods.sampleEvidenceCount(likelihoods.indexOfSample(sample)) == 0) {
        gb.DP(0);
        return;
    }

    final Set<Allele> alleles = new LinkedHashSet<>(vc.getAlleles());

    // make sure that there's a meaningful relationship between the alleles in the likelihoods and our VariantContext
    if ( !likelihoods.alleles().containsAll(alleles) ) {
        logger.warn("VC alleles " + alleles + " not a strict subset of AlleleLikelihoods alleles " + likelihoods.alleles());
        return;
    }

    // the depth for the HC is the sum of the informative alleles at this site.  It's not perfect (as we cannot
    // differentiate between reads that align over the event but aren't informative vs. those that aren't even
    // close) but it's a pretty good proxy and it matches with the AD field (i.e., sum(AD) = DP).
    final Map<Allele, List<Allele>> alleleSubset = alleles.stream().collect(Collectors.toMap(a -> a, a -> Arrays.asList(a)));
    final AlleleLikelihoods<GATKRead, Allele> subsettedLikelihoods = likelihoods.marginalize(alleleSubset);
    final int depth = (int) subsettedLikelihoods.bestAllelesBreakingTies(sample).stream().filter(ba -> ba.isInformative()).count();
    gb.DP(depth);
}
 
Example 10
Source File: GenotypeFilterSummary.java    From gatk with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
@Override
public void update1(VariantContext eval, ReferenceContext referenceContext, ReadsContext readsContext, FeatureContext featureContext) {
    Iterator<Genotype> it = eval.getGenotypes().iterator();
    while (it.hasNext()){
        Genotype g = it.next();
        if (g.isCalled() && !g.isFiltered()){
            nCalledNotFiltered++;
        }
        else if (g.isNoCall() || g.isFiltered()){
            nNoCallOrFiltered++;
        }
    }
}
 
Example 11
Source File: GtcToVcf.java    From picard with MIT License 4 votes vote down vote up
private VariantContext makeVariantContext(Build37ExtendedIlluminaManifestRecord record, final InfiniumGTCRecord gtcRecord,
                                          final InfiniumEGTFile egtFile, final ProgressLogger progressLogger) {
    // If the record is not flagged as errant in the manifest we include it in the VCF
    Allele A = record.getAlleleA();
    Allele B = record.getAlleleB();
    Allele ref = record.getRefAllele();

    final String chr = record.getB37Chr();
    final Integer position = record.getB37Pos();
    final Integer endPosition = position + ref.length() - 1;
    Integer egtIndex = egtFile.rsNameToIndex.get(record.getName());
    if (egtIndex == null) {
        throw new PicardException("Found no record in cluster file for manifest entry '" + record.getName() + "'");
    }

    progressLogger.record(chr, position);

    // Create list of unique alleles
    final List<Allele> assayAlleles = new ArrayList<>();
    assayAlleles.add(ref);

    if (A.equals(B)) {
        throw new PicardException("Found same allele (" + A.getDisplayString() + ") for A and B ");
    }

    if (!ref.equals(A, true)) {
        assayAlleles.add(A);
    }

    if (!ref.equals(B, true)) {
        assayAlleles.add(B);
    }

    final String sampleName = FilenameUtils.removeExtension(INPUT.getName());
    final Genotype genotype = getGenotype(sampleName, gtcRecord, record, A, B);

    final VariantContextBuilder builder = new VariantContextBuilder();

    builder.source(record.getName());
    builder.chr(chr);
    builder.start(position);
    builder.stop(endPosition);
    builder.alleles(assayAlleles);
    builder.log10PError(VariantContext.NO_LOG10_PERROR);
    builder.id(record.getName());
    builder.genotypes(genotype);

    VariantContextUtils.calculateChromosomeCounts(builder, false);

    //custom info fields
    builder.attribute(InfiniumVcfFields.ALLELE_A, record.getAlleleA());
    builder.attribute(InfiniumVcfFields.ALLELE_B, record.getAlleleB());
    builder.attribute(InfiniumVcfFields.ILLUMINA_STRAND, record.getIlmnStrand());
    builder.attribute(InfiniumVcfFields.PROBE_A, record.getAlleleAProbeSeq());
    builder.attribute(InfiniumVcfFields.PROBE_B, record.getAlleleBProbeSeq());
    builder.attribute(InfiniumVcfFields.BEADSET_ID, record.getBeadSetId());
    builder.attribute(InfiniumVcfFields.ILLUMINA_CHR, record.getChr());
    builder.attribute(InfiniumVcfFields.ILLUMINA_POS, record.getPosition());
    builder.attribute(InfiniumVcfFields.ILLUMINA_BUILD, record.getGenomeBuild());
    builder.attribute(InfiniumVcfFields.SOURCE, record.getSource().replace(' ', '_'));
    builder.attribute(InfiniumVcfFields.GC_SCORE, formatFloatForVcf(egtFile.totalScore[egtIndex]));

    for (InfiniumVcfFields.GENOTYPE_VALUES gtValue : InfiniumVcfFields.GENOTYPE_VALUES.values()) {
        final int ordinalValue = gtValue.ordinal();
        builder.attribute(InfiniumVcfFields.N[ordinalValue], egtFile.n[egtIndex][ordinalValue]);
        builder.attribute(InfiniumVcfFields.DEV_R[ordinalValue], formatFloatForVcf(egtFile.devR[egtIndex][ordinalValue]));
        builder.attribute(InfiniumVcfFields.MEAN_R[ordinalValue], formatFloatForVcf(egtFile.meanR[egtIndex][ordinalValue]));
        builder.attribute(InfiniumVcfFields.DEV_THETA[ordinalValue], formatFloatForVcf(egtFile.devTheta[egtIndex][ordinalValue]));
        builder.attribute(InfiniumVcfFields.MEAN_THETA[ordinalValue], formatFloatForVcf(egtFile.meanTheta[egtIndex][ordinalValue]));

        final EuclideanValues genotypeEuclideanValues = polarToEuclidean(egtFile.meanR[egtIndex][ordinalValue], egtFile.devR[egtIndex][ordinalValue],
                egtFile.meanTheta[egtIndex][ordinalValue], egtFile.devTheta[egtIndex][ordinalValue]);
        builder.attribute(InfiniumVcfFields.DEV_X[ordinalValue], formatFloatForVcf(genotypeEuclideanValues.devX));
        builder.attribute(InfiniumVcfFields.MEAN_X[ordinalValue], formatFloatForVcf(genotypeEuclideanValues.meanX));
        builder.attribute(InfiniumVcfFields.DEV_Y[ordinalValue], formatFloatForVcf(genotypeEuclideanValues.devY));
        builder.attribute(InfiniumVcfFields.MEAN_Y[ordinalValue], formatFloatForVcf(genotypeEuclideanValues.meanY));
    }


    final String rsid = record.getRsId();
    if (StringUtils.isNotEmpty(rsid)) {
        builder.attribute(InfiniumVcfFields.RS_ID, rsid);
    }
    if (egtFile.totalScore[egtIndex] == 0.0) {
        builder.filter(InfiniumVcfFields.ZEROED_OUT_ASSAY);
        if (genotype.isCalled()) {
            if (DO_NOT_ALLOW_CALLS_ON_ZEROED_OUT_ASSAYS) {
                throw new PicardException("Found a call (genotype: " + genotype + ") on a zeroed out Assay!!");
            } else {
                log.warn("Found a call (genotype: " + genotype + ") on a zeroed out Assay. " +
                        "This could occur if you called genotypes on a different cluster file than used here.");
            }
        }
    }
    if (record.isDupe()) {
        builder.filter(InfiniumVcfFields.DUPE);
    }
    return builder.make();
}
 
Example 12
Source File: ArraysCallingMetricAccumulator.java    From picard with MIT License 4 votes vote down vote up
private void updateDetailMetric(final CollectArraysVariantCallingMetrics.ArraysVariantCallingDetailMetrics metric,
                                final Genotype genotype,
                                final VariantContext vc,
                                final boolean hasSingletonSample) {
    metric.NUM_ASSAYS++;
    if (!vc.isFiltered() || vc.getCommonInfo().getFilters().contains(InfiniumVcfFields.DUPE)) {
        metric.NUM_NON_FILTERED_ASSAYS++;
        if (genotype.isCalled()) {
            metric.NUM_CALLS++;
            String gtA = (String) genotype.getExtendedAttribute(InfiniumVcfFields.GTA, genotype.getGenotypeString());
            if (!gtA.equals(VCFConstants.EMPTY_GENOTYPE)) {
                metric.NUM_AUTOCALL_CALLS++;
            }
        } else {
            metric.NUM_NO_CALLS++;
        }

        if (vc.isSNP()) {
            // Biallelic SNPs
            final boolean isInDbSnp = dbsnp.snps.isDbSnpSite(vc.getContig(), vc.getStart());

            metric.NUM_SNPS++;

            if (isInDbSnp) {
                metric.NUM_IN_DB_SNP++;
            }
        } else if (vc.isIndel()) {
            metric.NUM_INDELS++;
        }

        if (hasSingletonSample) {
            metric.NUM_SINGLETONS++;
        }

        if (genotype.isHet()) {
            metric.numHets++;
        } else if (genotype.isHomVar()) {
            metric.numHomVar++;
        }
    } else {
        metric.NUM_FILTERED_ASSAYS++;
        if (vc.getCommonInfo().getFilters().contains(InfiniumVcfFields.ZEROED_OUT_ASSAY)) {
            // A "zeroed-out SNP".  Marked as unusable/uncallable
            metric.NUM_ZEROED_OUT_ASSAYS++;
        }
    }
}
 
Example 13
Source File: MendelianViolation.java    From gatk with BSD 3-Clause "New" or "Revised" License 4 votes vote down vote up
protected void updateViolations(final String familyId, final String motherId, final String fatherId, final String childId, final VariantContext vc){
    final Genotype gMom = vc.getGenotype(motherId);
    final Genotype gDad = vc.getGenotype(fatherId);
    final Genotype gChild = vc.getGenotype(childId);

    if (gMom == null || gDad == null || gChild == null){
        if(abortOnSampleNotFound) {
            throw new IllegalArgumentException(String.format("Variant %s:%d: Missing genotypes for family %s: mom=%s dad=%s family=%s", vc.getContig(), vc.getStart(), familyId, motherId, fatherId, childId));
        }
        else {
            return;
        }
    }

    //Count No calls
    if(allCalledOnly && (!gMom.isCalled() || !gDad.isCalled() || !gChild.isCalled())){
        noCall++;
    }
    else if (!gMom.isCalled() && !gDad.isCalled() || !gChild.isCalled()){
        noCall++;
    }
    //Count lowQual. Note that if min quality is set to 0, even values with no quality associated are returned
    else if (minGenotypeQuality > 0 && (
        gMom.getGQ()   < minGenotypeQuality ||
        gDad.getGQ()   < minGenotypeQuality ||
        gChild.getGQ() < minGenotypeQuality )) {
        //no call
        lowQual++;
    }
    else {
        //Count all families per loci called
        familyCalled++;

        if (!(gMom.isHomRef() && gDad.isHomRef() && gChild.isHomRef())) {
            varFamilyCalled++;
        }

        if(isViolation(gMom, gDad, gChild)){
            violationFamilies.add(familyId);
            violations_total++;
        }
        final int count = inheritance.get(gMom.getType()).get(gDad.getType()).get(gChild.getType());
        inheritance.get(gMom.getType()).get(gDad.getType()).put(gChild.getType(),count+1);
    }
}
 
Example 14
Source File: SelectVariants.java    From gatk with BSD 3-Clause "New" or "Revised" License 4 votes vote down vote up
private boolean sampleHasVariant(final Genotype g) {
    return (g !=null && !g.isHomRef() && (g.isCalled() || (g.isFiltered() && !XLfiltered)));
}