Java Code Examples for htsjdk.variant.variantcontext.Genotype#getPloidy()

The following examples show how to use htsjdk.variant.variantcontext.Genotype#getPloidy() . You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: SelectVariants.java    From gatk with BSD 3-Clause "New" or "Revised" License 6 votes vote down vote up
/**
 * Initialize cache of allele anyploid indices
 *
 * Initialize the cache of PL index to a list of alleles for each ploidy.
 *
 * @param vc    Variant Context
 */
private void initalizeAlleleAnyploidIndicesCache(final VariantContext vc) {
    if (vc.getType() != VariantContext.Type.NO_VARIATION) { // Bypass if not a variant
        for (final Genotype g : vc.getGenotypes()) {
            // Make a new entry if the we have not yet cached a PL to allele indices map for this ploidy and allele count
            // skip if there are no PLs -- this avoids hanging on high-allelic somatic samples, for example, where
            // there's no need for the PL indices since they don't exist
            if (g.getPloidy() != 0 && (!ploidyToNumberOfAlleles.containsKey(g.getPloidy()) || ploidyToNumberOfAlleles.get(g.getPloidy()) < vc.getNAlleles())) {
                if (vc.getGenotypes().stream().anyMatch(Genotype::hasLikelihoods)) {
                    GenotypeLikelihoods.initializeAnyploidPLIndexToAlleleIndices(vc.getNAlleles() - 1, g.getPloidy());
                    ploidyToNumberOfAlleles.put(g.getPloidy(), vc.getNAlleles());
                }
            }
        }
    }

}
 
Example 2
Source File: GVCFBlockCombiner.java    From gatk with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
boolean genotypeCanBeMergedInCurrentBlock(final Genotype g) {
    final HomRefBlock currentHomRefBlock = (HomRefBlock)currentBlock;
    return currentHomRefBlock != null
            && currentHomRefBlock.withinBounds(Math.min(g.getGQ(), MAX_GENOTYPE_QUAL))
            && currentHomRefBlock.getPloidy() == g.getPloidy()
            && (currentHomRefBlock.getMinPLs() == null || !g.hasPL() || (currentHomRefBlock.getMinPLs().length == g.getPL().length));
}
 
Example 3
Source File: VariantAFEvaluator.java    From gatk with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
public void update1(VariantContext vc, final ReferenceContext referenceContext, final ReadsContext readsContext, final FeatureContext featureContext) {
    vc.getStart();

    if (vc == null || !vc.isSNP() || (getWalker().ignoreAC0Sites() && vc.isMonomorphicInSamples())) {
        return;
    }

    for (final Genotype genotype : vc.getGenotypes()) {
         // eval array
        if (!genotype.isNoCall()) {
            if (genotype.getPloidy() != PLOIDY) {

                throw new UserException.BadInput("This tool only works with ploidy 2");
            }
            // add AF at this site
            this.totalCalledSites += 1;
            int numReferenceAlleles= genotype.countAllele(vc.getReference());
            double varAFHere = (PLOIDY - numReferenceAlleles)/PLOIDY;
            this.sumVariantAFs += varAFHere;

            totalHetSites += numReferenceAlleles == 1 ? 1 : 0;
            totalHomVarSites += numReferenceAlleles == 0 ? 1 : 0;
            totalHomRefSites += numReferenceAlleles == 2 ? 1 : 0;

        }
    }

    if (!vc.hasGenotypes()) {
        // comp  ( sites only thousand genomes )
        this.totalCalledSites += 1;
        this.sumVariantAFs += vc.getAttributeAsDouble("AF", 0.0);
    }
}
 
Example 4
Source File: StatisticsTask.java    From imputationserver with GNU Affero General Public License v3.0 4 votes vote down vote up
public void checkPloidy(List<String> samples, VariantContext snp, boolean isPhased, LineWriter chrXInfoWriter,
		HashSet<String> hapSamples) throws IOException {

	for (final String name : samples) {

		Genotype genotype = snp.getGenotype(name);

		if (hapSamples.contains(name) && genotype.getPloidy() != 1) {
			chrXInfoWriter.write(name + "\t" + snp.getContig() + ":" + snp.getStart());
			this.chrXPloidyError = true;

		}

		if (genotype.getPloidy() == 1) {
			hapSamples.add(name);
		}

	}
}
 
Example 5
Source File: VcfToVariant.java    From genomewarp with Apache License 2.0 4 votes vote down vote up
@VisibleForTesting
static List<VariantCall> getCalls(VariantContext vc, VCFHeader header) {
  List<VariantCall> toReturn = new ArrayList<>();

  for (String currSample : header.getGenotypeSamples()) {
    if (!vc.hasGenotype(currSample)) {
      continue;
    }

    Genotype currGenotype = vc.getGenotype(currSample);
    VariantCall.Builder vcBuilder = VariantCall.newBuilder();
    vcBuilder.setCallSetName(currSample);

    // Get GT info.
    final Map<Allele, Integer> alleleStrings = buildAlleleMap(vc);
    vcBuilder.addGenotype(alleleStrings.get(currGenotype.getAllele(0)));
    for (int i = 1; i < currGenotype.getPloidy(); i++) {
      vcBuilder.addGenotype(alleleStrings.get(currGenotype.getAllele(i)));
    }

    // Set phasing (not applicable to haploid).
    if (currGenotype.isPhased() && currGenotype.getPloidy() > 1) {
      vcBuilder.setPhaseset("*");
    }

    // Get rest of the genotype info.
    Map<String, ListValue> genotypeInfo = new HashMap<>();

    // Set filters
    if (currGenotype.isFiltered()) {
      genotypeInfo.put(VCFConstants.GENOTYPE_FILTER_KEY, ListValue.newBuilder()
          .addValues(Value.newBuilder().setStringValue(currGenotype.getFilters()).build())
          .build());
    }

    for (final String field : vc.calcVCFGenotypeKeys(header)) {
      // We've already handled genotype
      if (field.equals(VCFConstants.GENOTYPE_KEY)) {
        continue;
      }

      ListValue.Builder listValueBuilder = ListValue.newBuilder();
      if (field.equals(VCFConstants.GENOTYPE_FILTER_KEY)) {
        // This field has already been dealt with
        continue;
      } else {
        final IntGenotypeFieldAccessors.Accessor accessor =
            GENOTYPE_FIELD_ACCESSORS.getAccessor(field);

        if (accessor != null) {
          // The field is a default inline field.
          if (!parseInlineGenotypeFields(field, vcBuilder, listValueBuilder, accessor,
              currGenotype)) {
            continue;
          }
        } else {
          // Other field, we'll get type/other info from header.
          if (!parseOtherGenotypeFields(field, vc, listValueBuilder, currGenotype,
              header)) {
            continue;
          }
        }
      }

      genotypeInfo.put(field, listValueBuilder.build());
    }

    vcBuilder.putAllInfo(genotypeInfo);
    toReturn.add(vcBuilder.build());
  }

  return toReturn;
}
 
Example 6
Source File: HomRefBlock.java    From gatk with BSD 3-Clause "New" or "Revised" License 4 votes vote down vote up
/**
 * Add a homRef block to the current block
 *
 * @param pos current genomic position
 * @param newEnd new calculated block end position
 * @param genotype A non-null Genotype with GQ and DP attributes
 */
@Override
public void add(final int pos, final int newEnd, final Genotype genotype) {
    Utils.nonNull(genotype, "genotype cannot be null");
    if ( ! genotype.hasPL() ) { throw new IllegalArgumentException("genotype must have PL field");}
    if ( pos != end + 1 ) { throw new IllegalArgumentException("adding genotype at pos " + pos + " isn't contiguous with previous end " + end); }
    if ( genotype.getPloidy() != ploidy) { throw new IllegalArgumentException("cannot add a genotype with a different ploidy: " + genotype.getPloidy() + " != " + ploidy); }
    // Make sure the GQ is within the bounds of this band. Treat GQs > 99 as 99.
    if ( !withinBounds(Math.min(genotype.getGQ(), VCFConstants.MAX_GENOTYPE_QUAL))) {
        throw new IllegalArgumentException("cannot add a genotype with GQ=" + genotype.getGQ() + " because it's not within bounds ["
                + this.getGQLowerBound() + ',' + this.getGQUpperBound() + ')');
    }

    if( minPLs == null ) {
        minPLs = genotype.getPL();
    }
    else { // otherwise take the min with the provided genotype's PLs
        final int[] pls = genotype.getPL();
        if (pls.length != minPLs.length) {
            throw new GATKException("trying to merge different PL array sizes: " + pls.length + " != " + minPLs.length);
        }
        for (int i = 0; i < pls.length; i++) {
            minPLs[i] = Math.min(minPLs[i], pls[i]);
        }
    }

    if( genotype.hasExtendedAttribute(GATKVCFConstants.PHRED_SCALED_POSTERIORS_KEY)) {
        if (minPPs == null ) {
            minPPs = PosteriorProbabilitiesUtils.parsePosteriorsIntoPhredSpace(genotype);
        }
        else { // otherwise take the min with the provided genotype's PLs
            final int[] pps = PosteriorProbabilitiesUtils.parsePosteriorsIntoPhredSpace(genotype);
            if (pps.length != minPPs.length) {
                throw new GATKException("trying to merge different PP array sizes: " + pps.length + " != " + minPPs.length);
            }
            for (int i = 0; i < pps.length; i++) {
                minPPs[i] = Math.min(minPPs[i], pps[i]);
            }
        }
    }

    end = newEnd;
    DPs.add(Math.max(genotype.getDP(), 0)); // DP must be >= 0
}
 
Example 7
Source File: AlleleFrequencyCalculator.java    From gatk with BSD 3-Clause "New" or "Revised" License 4 votes vote down vote up
/**
 * Compute the probability of the alleles segregating given the genotype likelihoods of the samples in vc
 *
 * @param vc the VariantContext holding the alleles and sample information.  The VariantContext
 *           must have at least 1 alternative allele
 * @return result (for programming convenience)
 */
public AFCalculationResult calculate(final VariantContext vc, final int defaultPloidy) {
    Utils.nonNull(vc, "VariantContext cannot be null");
    final int numAlleles = vc.getNAlleles();
    final List<Allele> alleles = vc.getAlleles();
    Utils.validateArg( numAlleles > 1, () -> "VariantContext has only a single reference allele, but getLog10PNonRef requires at least one at all " + vc);

    final double[] priorPseudocounts = alleles.stream()
            .mapToDouble(a -> a.isReference() ? refPseudocount : (a.length() == vc.getReference().length() ? snpPseudocount : indelPseudocount)).toArray();

    double[] alleleCounts = new double[numAlleles];
    final double flatLog10AlleleFrequency = -MathUtils.log10(numAlleles); // log10(1/numAlleles)
    double[] log10AlleleFrequencies = new IndexRange(0, numAlleles).mapToDouble(n -> flatLog10AlleleFrequency);

    for (double alleleCountsMaximumDifference = Double.POSITIVE_INFINITY; alleleCountsMaximumDifference > THRESHOLD_FOR_ALLELE_COUNT_CONVERGENCE; ) {
        final double[] newAlleleCounts = effectiveAlleleCounts(vc, log10AlleleFrequencies);
        alleleCountsMaximumDifference = Arrays.stream(MathArrays.ebeSubtract(alleleCounts, newAlleleCounts)).map(Math::abs).max().getAsDouble();
        alleleCounts = newAlleleCounts;
        final double[] posteriorPseudocounts = MathArrays.ebeAdd(priorPseudocounts, alleleCounts);

        // first iteration uses flat prior in order to avoid local minimum where the prior + no pseudocounts gives such a low
        // effective allele frequency that it overwhelms the genotype likelihood of a real variant
        // basically, we want a chance to get non-zero pseudocounts before using a prior that's biased against a variant
        log10AlleleFrequencies = new Dirichlet(posteriorPseudocounts).log10MeanWeights();
    }

    double[] log10POfZeroCountsByAllele = new double[numAlleles];
    double log10PNoVariant = 0;

    final boolean spanningDeletionPresent = alleles.contains(Allele.SPAN_DEL);
    final Map<Integer, int[]> nonVariantIndicesByPloidy = new Int2ObjectArrayMap<>();

    // re-usable buffers of the log10 genotype posteriors of genotypes missing each allele
    final List<DoubleArrayList> log10AbsentPosteriors = IntStream.range(0,numAlleles).mapToObj(n -> new DoubleArrayList()).collect(Collectors.toList());
    for (final Genotype g : vc.getGenotypes()) {
        if (!g.hasLikelihoods()) {
            continue;
        }
        final int ploidy = g.getPloidy() == 0 ? defaultPloidy : g.getPloidy();
        final GenotypeLikelihoodCalculator glCalc = GL_CALCS.getInstance(ploidy, numAlleles);

        final double[] log10GenotypePosteriors = log10NormalizedGenotypePosteriors(g, glCalc, log10AlleleFrequencies);

        //the total probability
        if (!spanningDeletionPresent) {
            log10PNoVariant += log10GenotypePosteriors[HOM_REF_GENOTYPE_INDEX];
        } else {
            nonVariantIndicesByPloidy.computeIfAbsent(ploidy, p -> genotypeIndicesWithOnlyRefAndSpanDel(p, alleles));
            final int[] nonVariantIndices = nonVariantIndicesByPloidy.get(ploidy);
            final double[] nonVariantLog10Posteriors = MathUtils.applyToArray(nonVariantIndices, n -> log10GenotypePosteriors[n]);
            // when the only alt allele is the spanning deletion the probability that the site is non-variant
            // may be so close to 1 that finite precision error in log10SumLog10 yields a positive value,
            // which is bogus.  Thus we cap it at 0.
            log10PNoVariant += Math.min(0,MathUtils.log10SumLog10(nonVariantLog10Posteriors));
        }

        // if the VC is biallelic the allele-specific qual equals the variant qual
        if (numAlleles == 2) {
            continue;
        }

        // for each allele, we collect the log10 probabilities of genotypes in which the allele is absent, then add (in log space)
        // to get the log10 probability that the allele is absent in this sample
        log10AbsentPosteriors.forEach(DoubleArrayList::clear);  // clear the buffers.  Note that this is O(1) due to the primitive backing array
        for (int genotype = 0; genotype < glCalc.genotypeCount(); genotype++) {
            final double log10GenotypePosterior = log10GenotypePosteriors[genotype];
            glCalc.genotypeAlleleCountsAt(genotype).forEachAbsentAlleleIndex(a -> log10AbsentPosteriors.get(a).add(log10GenotypePosterior), numAlleles);
        }

        final double[] log10PNoAllele = log10AbsentPosteriors.stream()
                .mapToDouble(buffer -> MathUtils.log10SumLog10(buffer.toDoubleArray()))
                .map(x -> Math.min(0, x)).toArray();    // if prob of non hom ref > 1 due to finite precision, short-circuit to avoid NaN

        // multiply the cumulative probabilities of alleles being absent, which is addition of logs
        MathUtils.addToArrayInPlace(log10POfZeroCountsByAllele, log10PNoAllele);
    }

    // for biallelic the allele-specific qual equals the variant qual, and we short-circuited the calculation above
    if (numAlleles == 2) {
        log10POfZeroCountsByAllele[1] = log10PNoVariant;
    }

    // unfortunately AFCalculationResult expects integers for the MLE.  We really should emit the EM no-integer values
    // which are valuable (eg in CombineGVCFs) as the sufficient statistics of the Dirichlet posterior on allele frequencies
    final int[] integerAlleleCounts = Arrays.stream(alleleCounts).mapToInt(x -> (int) Math.round(x)).toArray();
    final int[] integerAltAlleleCounts = Arrays.copyOfRange(integerAlleleCounts, 1, numAlleles);

    //skip the ref allele (index 0)
    final Map<Allele, Double> log10PRefByAllele = IntStream.range(1, numAlleles).boxed()
            .collect(Collectors.toMap(alleles::get, a -> log10POfZeroCountsByAllele[a]));

    return new AFCalculationResult(integerAltAlleleCounts, alleles, log10PNoVariant, log10PRefByAllele);
}