Java Code Examples for htsjdk.variant.variantcontext.Genotype#getAD()

The following examples show how to use htsjdk.variant.variantcontext.Genotype#getAD() . You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: BasicSomaticShortMutationValidator.java    From gatk with BSD 3-Clause "New" or "Revised" License 6 votes vote down vote up
/**
 * @param genotype The genotype to test whether we can attempt to validate.  Never {@code null}
 * @param referenceAllele The reference allele (from parent VariantContext).  Never {@code null}
 * @return whether this class can even attempt a validation of the genotype in question.
 */
public static boolean isAbleToValidateGenotype(final Genotype genotype, final Allele referenceAllele) {
    Utils.nonNull(genotype);
    Utils.nonNull(referenceAllele);

    // In order to proceed, we have some assumptions:
    //  - The genotype has a ploidy of 2
    //  - The genotype is a simple indel or a xNP
    //  - The first allele of the genotype is reference
    final boolean isDiploid = genotype.getAlleles().size() == 2;
    final boolean doesGenotypeHaveReference = genotype.getAllele(0).equals(referenceAllele);
    final boolean isReferenceNotSymbolic = !referenceAllele.isSymbolic();
    final VariantContext.Type variantType = GATKVariantContextUtils.typeOfVariant(genotype.getAllele(0), genotype.getAllele(1));
    final boolean isValidatableVariantType = VALIDATABLE_TYPES.contains(variantType)
                    && !GATKVariantContextUtils.isComplexIndel(genotype.getAllele(0), genotype.getAllele(1));
    final boolean hasKnownCoverage = genotype.hasAD() && (genotype.getAD().length == 2);
    final boolean isValidateable = (isDiploid && doesGenotypeHaveReference && isValidatableVariantType && hasKnownCoverage && isReferenceNotSymbolic);
    if (!isValidateable) {
        logger.info("Cannot validate genotype: " + genotype + "  ploidy2: " + isDiploid +
                "  genotypeHasReferenceAllele: " + doesGenotypeHaveReference + "   validatableVariant: " + isValidatableVariantType +
                "  hasCompleteAD field: " + hasKnownCoverage + "  isNonSymbolicReference: " + isReferenceNotSymbolic);
    }

    return isValidateable;
}
 
Example 2
Source File: AS_RMSMappingQuality.java    From gatk with BSD 3-Clause "New" or "Revised" License 6 votes vote down vote up
private Map<Allele, Integer> getADcounts(final VariantContext vc) {
    final GenotypesContext genotypes = vc.getGenotypes();
    if ( genotypes == null || genotypes.size() == 0 ) {
        genotype_logger.warn("VC does not have genotypes -- annotations were calculated in wrong order");
        return null;
    }

    final Map<Allele, Integer> variantADs = new HashMap<>();
    for(final Allele a : vc.getAlleles())
        variantADs.put(a,0);

    for (final Genotype gt : vc.getGenotypes()) {
        if(gt.hasAD()) {
            final int[] ADs = gt.getAD();
            for (int i = 1; i < vc.getNAlleles(); i++) {
                variantADs.put(vc.getAlternateAllele(i - 1), variantADs.get(vc.getAlternateAllele(i - 1)) + ADs[i]); //here -1 is to reconcile allele index with alt allele index
            }
        }
    }
    return variantADs;
}
 
Example 3
Source File: Mutect2IntegrationTest.java    From gatk with BSD 3-Clause "New" or "Revised" License 6 votes vote down vote up
@Test
@SuppressWarnings("deprecation")
public void testAFAtHighDP()  {
    Utils.resetRandomGenerator();
    final File unfilteredVcf = createTempFile("unfiltered", ".vcf");

    runMutect2(DEEP_MITO_BAM, unfilteredVcf, "chrM:1-1018", MITO_REF.getAbsolutePath(), Optional.empty(),
            args -> args.add(IntervalArgumentCollection.INTERVAL_PADDING_LONG_NAME, 300));

    final List<VariantContext> variants = VariantContextTestUtils.streamVcf(unfilteredVcf).collect(Collectors.toList());

    for (final VariantContext vc : variants) {
        Assert.assertTrue(vc.isBiallelic()); //I do some lazy parsing below that won't hold for multiple alternate alleles
        final Genotype g = vc.getGenotype(DEEP_MITO_SAMPLE_NAME);
        Assert.assertTrue(g.hasAD());
        final int[] ADs = g.getAD();
        Assert.assertTrue(g.hasExtendedAttribute(GATKVCFConstants.ALLELE_FRACTION_KEY));
        //Assert.assertEquals(Double.parseDouble(String.valueOf(vc.getGenotype(DEEP_MITO_SAMPLE_NAME).getExtendedAttribute(GATKVCFConstants.ALLELE_FRACTION_KEY,"0"))), (double)ADs[1]/(ADs[0]+ADs[1]), 1e-6);
        Assert.assertEquals(Double.parseDouble(String.valueOf(vc.getGenotype(DEEP_MITO_SAMPLE_NAME).getAttributeAsString(GATKVCFConstants.ALLELE_FRACTION_KEY, "0"))), (double) ADs[1] / (ADs[0] + ADs[1]), 2e-3);
    }
}
 
Example 4
Source File: PositiveAllelicDepth.java    From hmftools with GNU General Public License v3.0 5 votes vote down vote up
private void run() {
    fileWriter.writeHeader(fileReader.getFileHeader());
    for (final VariantContext context : fileReader) {

        final VariantContextBuilder builder = new VariantContextBuilder(context);
        final List<Genotype> genotypes = Lists.newArrayList();

        for (int genotypeIndex = 0; genotypeIndex < context.getGenotypes().size(); genotypeIndex++) {
            Genotype genotype = context.getGenotype(genotypeIndex);

            if (genotype.hasAD() && genotype.hasDP()) {
                int[] ad = genotype.getAD();
                int total = 0;
                boolean updateRecord = false;
                for (int i = 0; i < ad.length; i++) {
                    int adValue = ad[i];
                    if (adValue < 0) {
                        updateRecord = true;
                        ad[i] = Math.abs(adValue);
                    }
                    total += Math.abs(adValue);
                }

                if (updateRecord) {
                    LOGGER.info("Updated entry at {}:{}", context.getContig(), context.getStart());
                    Genotype updated = new GenotypeBuilder(genotype).AD(ad).DP(total).make();
                    genotypes.add(updated);
                }
                else {
                    genotypes.add(genotype);
                }
            }
        }

        builder.genotypes(genotypes);
        fileWriter.add(builder.make());
    }
}
 
Example 5
Source File: AllelicDepth.java    From hmftools with GNU General Public License v3.0 5 votes vote down vote up
@NotNull
static AllelicDepth fromGenotype(@NotNull final Genotype genotype) {
    Preconditions.checkArgument(genotype.hasAD());
    int[] adFields = genotype.getAD();
    final int alleleReadCount = adFields[1];
    int totalReadCount = totalReadCount(genotype);
    return ImmutableAllelicDepthImpl.builder().alleleReadCount(alleleReadCount).totalReadCount(totalReadCount).build();
}
 
Example 6
Source File: QualByDepth.java    From gatk with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
public static int getDepth(final GenotypesContext genotypes, final AlleleLikelihoods<GATKRead, Allele> likelihoods) {
    int depth = 0;
    int ADrestrictedDepth = 0;

    for ( final Genotype genotype : genotypes ) {
        // we care only about variant calls with likelihoods
        if ( !genotype.isHet() && !genotype.isHomVar() ) {
            continue;
        }

        // if we have the AD values for this sample, let's make sure that the variant depth is greater than 1!
        if ( genotype.hasAD() ) {
            final int[] AD = genotype.getAD();
            final int totalADdepth = (int) MathUtils.sum(AD);
            if ( totalADdepth != 0 ) {
                if (totalADdepth - AD[0] > 1) {
                    ADrestrictedDepth += totalADdepth;
                }
                depth += totalADdepth;
                continue;
            }
        }
        // if there is no AD value or it is a dummy value, we want to look to other means to get the depth
        if (likelihoods != null) {
            depth += likelihoods.sampleEvidenceCount(likelihoods.indexOfSample(genotype.getSampleName()));
        } else if ( genotype.hasDP() ) {
            depth += genotype.getDP();
        }
    }

    // if the AD-restricted depth is a usable value (i.e. not zero), then we should use that one going forward
    if ( ADrestrictedDepth > 0 ) {
        depth = ADrestrictedDepth;
    }
    return depth;
}
 
Example 7
Source File: AllelicDepth.java    From hmftools with GNU General Public License v3.0 4 votes vote down vote up
static boolean containsAllelicDepth(final Genotype genotype) {
    return genotype.hasAD() && genotype.getAD().length > 1;
}
 
Example 8
Source File: LiftoverUtils.java    From picard with MIT License 4 votes vote down vote up
/**
 * method to swap the reference and alt alleles of a bi-allelic, SNP
 *
 * @param vc                   the {@link VariantContext} (bi-allelic SNP) that needs to have it's REF and ALT alleles swapped.
 * @param annotationsToReverse INFO field annotations (of double value) that will be reversed (x->1-x)
 * @param annotationsToDrop    INFO field annotations that will be dropped from the result since they are invalid when REF and ALT are swapped
 * @return a new {@link VariantContext} with alleles swapped, INFO fields modified and in the genotypes, GT, AD and PL corrected appropriately
 */
public static VariantContext swapRefAlt(final VariantContext vc, final Collection<String> annotationsToReverse, final Collection<String> annotationsToDrop) {

    if (!vc.isBiallelic() || !vc.isSNP()) {
        throw new IllegalArgumentException("swapRefAlt can only process biallelic, SNPS, found " + vc.toString());
    }

    final VariantContextBuilder swappedBuilder = new VariantContextBuilder(vc);

    swappedBuilder.attribute(SWAPPED_ALLELES, true);

    // Use getBaseString() (rather than the Allele itself) in order to create new Alleles with swapped
    // reference and non-variant attributes
    swappedBuilder.alleles(Arrays.asList(vc.getAlleles().get(1).getBaseString(), vc.getAlleles().get(0).getBaseString()));

    final Map<Allele, Allele> alleleMap = new HashMap<>();

    // A mapping from the old allele to the new allele, to be used when fixing the genotypes
    alleleMap.put(vc.getAlleles().get(0), swappedBuilder.getAlleles().get(1));
    alleleMap.put(vc.getAlleles().get(1), swappedBuilder.getAlleles().get(0));

    final GenotypesContext swappedGenotypes = GenotypesContext.create(vc.getGenotypes().size());
    for (final Genotype genotype : vc.getGenotypes()) {
        final List<Allele> swappedAlleles = new ArrayList<>();
        for (final Allele allele : genotype.getAlleles()) {
            if (allele.isNoCall()) {
                swappedAlleles.add(allele);
            } else {
                swappedAlleles.add(alleleMap.get(allele));
            }
        }
        // Flip AD
        final GenotypeBuilder builder = new GenotypeBuilder(genotype).alleles(swappedAlleles);
        if (genotype.hasAD() && genotype.getAD().length == 2) {
            final int[] ad = ArrayUtils.clone(genotype.getAD());
            ArrayUtils.reverse(ad);
            builder.AD(ad);
        } else {
            builder.noAD();
        }

        //Flip PL
        if (genotype.hasPL() && genotype.getPL().length == 3) {
            final int[] pl = ArrayUtils.clone(genotype.getPL());
            ArrayUtils.reverse(pl);
            builder.PL(pl);
        } else {
            builder.noPL();
        }
        swappedGenotypes.add(builder.make());
    }
    swappedBuilder.genotypes(swappedGenotypes);

    for (final String key : vc.getAttributes().keySet()) {
        if (annotationsToDrop.contains(key)) {
            swappedBuilder.rmAttribute(key);
        } else if (annotationsToReverse.contains(key) && !vc.getAttributeAsString(key, "").equals(VCFConstants.MISSING_VALUE_v4)) {
            final double attributeToReverse = vc.getAttributeAsDouble(key, -1);

            if (attributeToReverse < 0 || attributeToReverse > 1) {
                log.warn("Trying to reverse attribute " + key +
                        " but found value that isn't between 0 and 1: (" + attributeToReverse + ") in variant " + vc + ". Results might be wrong.");
            }
            swappedBuilder.attribute(key, 1 - attributeToReverse);
        }
    }

    return swappedBuilder.make();
}
 
Example 9
Source File: BasicSomaticShortMutationValidator.java    From gatk with BSD 3-Clause "New" or "Revised" License 4 votes vote down vote up
/** Perform basic somatic pileup validation and return a result instance.
 *
 * NOTE: This only looks at the first alternate allele.  Multiallelics are not supported.
 *
 * @param genotype given genotype to validate.  Never {@code null}
 * @param referenceAllele Never {@code null}
 * @param validationNormalPileup Pileup for the coresponding location of the genotype in the validation normal.  Never {@code null}
 * @param validationTumorAltCount Alt read count for the validation tumor sample.  Must be positive or zero.
 * @param validationTumorTotalCount Total read count for the validation tumor sample.  Must be positive or zero.
 * @param minBaseQualityCutoff Minimum base quality threshold to count any read.  Must be positive or zero.
 * @param interval location represented in the genotype.  Never {@code null}
 * @param filters additional filters besides the ones on the genotype.  Typically,
 *                the parent variant context filters is provided here.  Never {@code null}
 * @return validation result.  {@code null} if the genotype cannot be validated
 */
public static BasicValidationResult calculateBasicValidationResult(final Genotype genotype, final Allele referenceAllele,
                                                                   final ReadPileup validationNormalPileup,
                                                                   int validationTumorAltCount, int validationTumorTotalCount,
                                                                   int minBaseQualityCutoff, final SimpleInterval interval, final String filters) {

    if (!isAbleToValidateGenotype(genotype, referenceAllele) || validationNormalPileup == null){
        return null;
    }

    Utils.nonNull(referenceAllele);
    Utils.nonNull(genotype);
    Utils.nonNull(interval);
    Utils.nonNull(filters);
    ParamUtils.isPositiveOrZero(validationTumorAltCount, "Validation alt count must be >= 0");
    ParamUtils.isPositiveOrZero(validationTumorTotalCount, "Validation total count must be >= 0");
    ParamUtils.isPositiveOrZero(minBaseQualityCutoff, "Minimum base quality cutoff must be >= 0");

    final double maxAltRatioSeenInNormalValidation = PowerCalculationUtils.calculateMaxAltRatio(validationNormalPileup,
            referenceAllele, minBaseQualityCutoff);
    final int discoveryTumorAltCount = genotype.getAD()[1];
    final int discoveryTumorTotalCount = genotype.getAD()[0] + discoveryTumorAltCount;

    if (Double.isNaN(maxAltRatioSeenInNormalValidation) || discoveryTumorTotalCount == 0) {
        return null;
    }

    final int minCountForSignal = PowerCalculationUtils.calculateMinCountForSignal(validationTumorTotalCount, maxAltRatioSeenInNormalValidation);
    final double power = PowerCalculationUtils.calculatePower(validationTumorTotalCount, discoveryTumorAltCount, discoveryTumorTotalCount, minCountForSignal);

    boolean isNotNoise = (validationTumorAltCount >= minCountForSignal);
    boolean isEnoughValidationCoverageToValidate = validationTumorAltCount >= 2;

    final String genotypeFilters = genotype.getFilters() == null ? "" : genotype.getFilters();
    final long numReadsSupportingAltInValidationNormal = PowerCalculationUtils.calculateNumReadsSupportingAllele(validationNormalPileup,
            genotype.getAllele(0), genotype.getAllele(1), minBaseQualityCutoff);
    return new BasicValidationResult(interval, minCountForSignal, isEnoughValidationCoverageToValidate,
            isNotNoise, power, validationTumorAltCount, validationTumorTotalCount-validationTumorAltCount,
            discoveryTumorAltCount, discoveryTumorTotalCount-discoveryTumorAltCount, referenceAllele,
            genotype.getAllele(1), filters + genotypeFilters, numReadsSupportingAltInValidationNormal);
}
 
Example 10
Source File: ContaminationFilter.java    From gatk with BSD 3-Clause "New" or "Revised" License 4 votes vote down vote up
@Override
public List<Double> calculateErrorProbabilityForAlleles(final VariantContext vc, final Mutect2FilteringEngine filteringEngine, ReferenceContext referenceContext) {
    // for every alt allele, a list of the depth and posterior pairs of each sample
    final List<List<ImmutablePair<Integer, Double>>> depthsAndPosteriorsPerAllele = new ArrayList<>();
    new IndexRange(0, vc.getNAlleles()-1).forEach(i -> depthsAndPosteriorsPerAllele.add(new ArrayList<>()));

    for (final Genotype tumorGenotype : vc.getGenotypes()) {
        if (filteringEngine.isNormal(tumorGenotype)) {
            continue;
        }

        final double contaminationFromFile = contaminationBySample.getOrDefault(tumorGenotype.getSampleName(), defaultContamination);
        final double contamination = Math.max(0, Math.min(contaminationFromFile, 1 - EPSILON)); // handle file with contamination == 1
        final int[] ADs = tumorGenotype.getAD();
        final int totalAD = (int) MathUtils.sum(ADs);
        final int[] altADs = Arrays.copyOfRange(ADs, 1, ADs.length); // get ADs of alts only
        // POPAF has only alt allele data
        final double[] negativeLog10AlleleFrequencies = VariantContextGetters.getAttributeAsDoubleArray(vc,
                GATKVCFConstants.POPULATION_AF_KEY, () -> new double[]{Double.POSITIVE_INFINITY, Double.POSITIVE_INFINITY}, Double.POSITIVE_INFINITY);
        final double[] alleleFrequencies = MathUtils.applyToArray(negativeLog10AlleleFrequencies, x -> Math.pow(10,-x));

        final SomaticClusteringModel model = filteringEngine.getSomaticClusteringModel();
        final double[] logSomaticLikelihoodPerAllele = Arrays.stream(altADs).mapToDouble(altCount -> model.logLikelihoodGivenSomatic(totalAD, altCount)).toArray();

        final double[] singleContaminantLikelihoodPerAllele = new double[alleleFrequencies.length];
        final double[] manyContaminantLikelihoodPerAllele = new double[alleleFrequencies.length];
        final double[] logContaminantLikelihoodPerAllele = new double[alleleFrequencies.length];
        final double[] logOddsOfRealVsContaminationPerAllele = new double[alleleFrequencies.length];
        final double[] posteriorProbOfContaminationPerAllele = new double[alleleFrequencies.length];
        new IndexRange(0,alleleFrequencies.length).forEach(i -> {
            singleContaminantLikelihoodPerAllele[i] = 2 * alleleFrequencies[i] * (1 - alleleFrequencies[i]) * MathUtils.binomialProbability(totalAD,  altADs[i], contamination /2)
                    + MathUtils.square(alleleFrequencies[i]) * MathUtils.binomialProbability(totalAD,  altADs[i], contamination);
            manyContaminantLikelihoodPerAllele[i] = MathUtils.binomialProbability(totalAD, altADs[i], contamination * alleleFrequencies[i]);
            logContaminantLikelihoodPerAllele[i] = Math.log(Math.max(singleContaminantLikelihoodPerAllele[i], manyContaminantLikelihoodPerAllele[i]));
            logOddsOfRealVsContaminationPerAllele[i] = logSomaticLikelihoodPerAllele[i] - logContaminantLikelihoodPerAllele[i];
        });

        new IndexRange(0,alleleFrequencies.length).forEach(i -> {
            posteriorProbOfContaminationPerAllele[i] = filteringEngine.posteriorProbabilityOfError(vc, logOddsOfRealVsContaminationPerAllele[i], i);
            depthsAndPosteriorsPerAllele.get(i).add(ImmutablePair.of(altADs[i], posteriorProbOfContaminationPerAllele[i]));
        });

    }

    return depthsAndPosteriorsPerAllele.stream().map(alleleData -> alleleData.isEmpty() ? Double.NaN : weightedMedianPosteriorProbability(alleleData)).collect(Collectors.toList());
}
 
Example 11
Source File: CreateSomaticPanelOfNormals.java    From gatk with BSD 3-Clause "New" or "Revised" License 4 votes vote down vote up
private static final int altCount(final Genotype g) {
    return g.hasAD() ? (int) MathUtils.sum(g.getAD()) - g.getAD()[0] : 0;
}
 
Example 12
Source File: CustomMafFuncotationCreator.java    From gatk with BSD 3-Clause "New" or "Revised" License 4 votes vote down vote up
/**
 * Create the MAF count fields (See {@link CustomMafFuncotationCreator#COUNT_FIELD_NAMES}) for a single pair in the given variant.
 *
 * If no samples can be linked to a tumor and normal combination, then blank values are generated for the count fields.
 * @param variant Never {@code null}
 * @param tnPair Never {@code null}
 * @return List of funcotations for the pair.  This list will usually be of length one, unless multiallelics are involved.
 *  Never {@code null}  Field values can be blank:
 *  - if there are no AD or AF format fields
 *  - if the specified pair is not in the variant sample list.
 */
private static List<Funcotation> createCustomMafCountFields(final VariantContext variant, final TumorNormalPair tnPair) {
    Utils.nonNull(variant);
    Utils.nonNull(tnPair);
    final List<Funcotation> result = new ArrayList<>();

    for (int i = 0; i < variant.getAlternateAlleles().size(); i++) {
        final Allele allele = variant.getAlternateAllele(i);
        final String tumorSampleName = tnPair.getTumor();
        final String normalSampleName = tnPair.getNormal();

        // This code assumes that the AD field is of Type "R"
        final Genotype tumorGenotype = variant.getGenotype(tumorSampleName);
        final boolean hasTumorAD = (tumorGenotype != null) && (tumorGenotype.hasAD());
        final boolean hasTumorAF = (tumorGenotype != null) && (tumorGenotype.hasAnyAttribute(GATKVCFConstants.ALLELE_FRACTION_KEY));

        // Just make a quick check that the tumor genotype has allelic depth for both a ref and alt
        // This is REQUIRED, but can be missing and our validations seem to miss it:
        if ( hasTumorAD && tumorGenotype.getAD().length < 2 ) {
            throw new UserException("Allelic Depth (AD field) for Variant[" +
                    variant.getContig() + ":" + variant.getStart() + "_" +
                    variant.getReference().toString() + "->" + variant.getAlternateAlleles().get(0).toString() + ']' +
                    " Does not contain both a REF and an ALT value (only one value is present)!");
        }

        final String tAltCount = hasTumorAD ? Integer.toString(tumorGenotype.getAD()[i + 1]) : "";
        final String tRefCount = hasTumorAD ? Integer.toString(tumorGenotype.getAD()[0]) : "";
        final String tumorFAllAlleles = hasTumorAF ? tumorGenotype.getAnyAttribute(GATKVCFConstants.ALLELE_FRACTION_KEY).toString() : "";
        final String tumorF = hasTumorAF ? StringUtils.split(tumorFAllAlleles, ",")[i] : "";

        final Genotype normalGenotype = variant.getGenotype(normalSampleName);
        final boolean hasNormalAD = (normalGenotype != null) && (normalGenotype.hasAD());
        final String nAltCount = hasNormalAD ? Integer.toString(normalGenotype.getAD()[i + 1]) : "";
        final String nRefCount = hasNormalAD ? Integer.toString(normalGenotype.getAD()[0]) : "";

        final List<String> fieldValues = Arrays.asList(
                tAltCount,
                tRefCount,
                nAltCount,
                nRefCount,
                tumorF);

        result.add(TableFuncotation.create(COUNT_FIELD_NAMES, fieldValues, allele, MafOutputRendererConstants.MAF_COUNT_RENDERING_DATASOURCE_DUMMY_NAME, createCustomMafCountFieldsMetadata()));
    }

    return result;
}