htsjdk.variant.variantcontext.GenotypesContext Java Examples

The following examples show how to use htsjdk.variant.variantcontext.GenotypesContext. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: VariantContextSingletonFilter.java    From Drop-seq with MIT License 6 votes vote down vote up
@Override
/**
 * For each variant context, look at the genotypes of the samples, and count the number of samples that have
 * an alternate allele.  If that number of samples!=1, return true to filter this record.
 * @param rec
 * @return
 */
public boolean filterOut(final VariantContext rec) {
	GenotypesContext gc = rec.getGenotypes();
	Iterator<Genotype> iter = gc.iterator();
	int count=0;
	while (iter.hasNext()) {
		Genotype g = iter.next();
		// boolean isHet = g.isHet();
		// boolean homRef = g.isHomRef();

		if (hetVarOnly & g.isHomVar())
			return true; // filter when het only and observe hom_var.
		if (g.isHet() || g.isHomVar())
			count++;
	}
	if (count==1) return false;
	return true;
}
 
Example #2
Source File: InbreedingCoeff.java    From gatk with BSD 3-Clause "New" or "Revised" License 6 votes vote down vote up
@VisibleForTesting
static Pair<Integer, Double> calculateIC(final VariantContext vc, final GenotypesContext genotypes) {
    final GenotypeCounts t = GenotypeUtils.computeDiploidGenotypeCounts(vc, genotypes, ROUND_GENOTYPE_COUNTS);

    final double refCount = t.getRefs();
    final double hetCount = t.getHets();
    final double homCount = t.getHoms();
    // number of samples that have likelihoods
    final int sampleCount = (int) genotypes.stream().filter(g-> GenotypeUtils.isDiploidWithLikelihoods(g)).count();

    final double p = ( 2.0 * refCount + hetCount ) / ( 2.0 * (refCount + hetCount + homCount) ); // expected reference allele frequency
    final double q = 1.0 - p; // expected alternative allele frequency
    final double expectedHets = 2.0 * p * q * sampleCount; //numbers of hets that would be expected based on the allele frequency (asuming Hardy Weinberg Equilibrium)
    final double F = 1.0 - ( hetCount / expectedHets ); // inbreeding coefficient

    return Pair.of(sampleCount, F);
}
 
Example #3
Source File: InbreedingCoeff.java    From gatk with BSD 3-Clause "New" or "Revised" License 6 votes vote down vote up
@Override
public Map<String, Object> annotate(final ReferenceContext ref,
                                    final VariantContext vc,
                                    final AlleleLikelihoods<GATKRead, Allele> likelihoods) {
    Utils.nonNull(vc);
    final GenotypesContext genotypes = getFounderGenotypes(vc);
    if (genotypes == null || genotypes.size() < MIN_SAMPLES || !vc.isVariant()) {
        logger.warn("InbreedingCoeff will not be calculated; at least " + MIN_SAMPLES + " samples must have called genotypes");
        return Collections.emptyMap();
    }
    final Pair<Integer, Double> sampleCountCoeff = calculateIC(vc, genotypes);
    final int sampleCount = sampleCountCoeff.getLeft();
    final double F = sampleCountCoeff.getRight();
    if (sampleCount < MIN_SAMPLES) {
        logger.warn("InbreedingCoeff will not be calculated for at least one position; at least " + MIN_SAMPLES + " samples must have called genotypes");
        return Collections.emptyMap();
    }
    return Collections.singletonMap(getKeyNames().get(0), String.format("%.4f", F));
}
 
Example #4
Source File: ExcessHet.java    From gatk with BSD 3-Clause "New" or "Revised" License 6 votes vote down vote up
@Override
public Map<String, Object> annotate(final ReferenceContext ref,
                                    final VariantContext vc,
                                    final AlleleLikelihoods<GATKRead, Allele> likelihoods) {
    GenotypesContext genotypes = getFounderGenotypes(vc);
    if (genotypes == null || !vc.isVariant()) {
        return Collections.emptyMap();
    }
    final Pair<Integer, Double> sampleCountEH = calculateEH(vc, genotypes);
    final int sampleCount = sampleCountEH.getLeft();
    final double eh =  sampleCountEH.getRight();

    if (sampleCount < 1) {
        return Collections.emptyMap();
    }
    return Collections.singletonMap(getKeyNames().get(0), (Object) String.format("%.4f", eh));
}
 
Example #5
Source File: AS_RMSMappingQuality.java    From gatk with BSD 3-Clause "New" or "Revised" License 6 votes vote down vote up
private Map<Allele, Integer> getADcounts(final VariantContext vc) {
    final GenotypesContext genotypes = vc.getGenotypes();
    if ( genotypes == null || genotypes.size() == 0 ) {
        genotype_logger.warn("VC does not have genotypes -- annotations were calculated in wrong order");
        return null;
    }

    final Map<Allele, Integer> variantADs = new HashMap<>();
    for(final Allele a : vc.getAlleles())
        variantADs.put(a,0);

    for (final Genotype gt : vc.getGenotypes()) {
        if(gt.hasAD()) {
            final int[] ADs = gt.getAD();
            for (int i = 1; i < vc.getNAlleles(); i++) {
                variantADs.put(vc.getAlternateAllele(i - 1), variantADs.get(vc.getAlternateAllele(i - 1)) + ADs[i]); //here -1 is to reconcile allele index with alt allele index
            }
        }
    }
    return variantADs;
}
 
Example #6
Source File: SelectVariants.java    From gatk with BSD 3-Clause "New" or "Revised" License 6 votes vote down vote up
private VariantContext buildVariantContextWithDroppedAnnotationsRemoved(final VariantContext vc) {
    if (infoAnnotationsToDrop.isEmpty() && genotypeAnnotationsToDrop.isEmpty()) {
        return vc;
    }
    final VariantContextBuilder rmAnnotationsBuilder = new VariantContextBuilder(vc);
    for (String infoField : infoAnnotationsToDrop) {
        rmAnnotationsBuilder.rmAttribute(infoField);
    }
    if (!genotypeAnnotationsToDrop.isEmpty()) {
        final ArrayList<Genotype> genotypesToWrite = new ArrayList<>();
        for (Genotype genotype : vc.getGenotypes()) {
            final GenotypeBuilder genotypeBuilder = new GenotypeBuilder(genotype).noAttributes();
            final Map<String, Object> attributes = new HashMap<>(genotype.getExtendedAttributes());
            for (String genotypeAnnotation : genotypeAnnotationsToDrop) {
                attributes.remove(genotypeAnnotation);
            }
            genotypeBuilder.attributes(attributes);
            genotypesToWrite.add(genotypeBuilder.make());
        }
        rmAnnotationsBuilder.genotypes(GenotypesContext.create(genotypesToWrite));
    }
    return rmAnnotationsBuilder.make();
}
 
Example #7
Source File: StrandBiasTest.java    From gatk with BSD 3-Clause "New" or "Revised" License 6 votes vote down vote up
/**
 * Create the contingency table by retrieving the per-sample strand bias annotation and adding them together
 * @param genotypes the genotypes from which to pull out the per-sample strand bias annotation
 * @param minCount minimum threshold for the sample strand bias counts for each ref and alt.
 *                 If both ref and alt counts are above minCount the whole sample strand bias is added to the resulting table
 * @return the table used for several strand bias tests, will be null if none of the genotypes contain the per-sample SB annotation
 */
protected int[][] getTableFromSamples( final GenotypesContext genotypes, final int minCount ) {
    if( genotypes == null ) {
        return null;
    }

    final int[] sbArray = {0,0,0,0}; // reference-forward-reverse -by- alternate-forward-reverse
    boolean foundData = false;

    for( final Genotype g : genotypes ) {
        if( ! g.hasAnyAttribute(GATKVCFConstants.STRAND_BIAS_BY_SAMPLE_KEY) ) {
            continue;
        }

        foundData = true;
        int[] data = getStrandCounts(g);

        if ( passesMinimumThreshold(data, minCount) ) {
            for( int index = 0; index < sbArray.length; index++ ) {
                sbArray[index] += data[index];
            }
        }
    }

    return foundData ? decodeSBBS(sbArray) : null ;
}
 
Example #8
Source File: MappingQualityZeroUnitTest.java    From gatk with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
private VariantContext makeVC() {
    final GenotypesContext testGC = GenotypesContext.create(2);
    final Allele refAllele = Allele.create("A", true);
    final Allele altAllele = Allele.create("T");

    return (new VariantContextBuilder())
            .alleles(Arrays.asList(refAllele, altAllele)).chr("1").start(15L).stop(15L).genotypes(testGC).make();
}
 
Example #9
Source File: CoverageUnitTest.java    From gatk with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
private VariantContext makeVC() {
    final GenotypesContext testGC = GenotypesContext.create(2);
    final Allele refAllele = Allele.create("A", true);
    final Allele altAllele = Allele.create("T");

    return (new VariantContextBuilder())
            .alleles(Arrays.asList(refAllele, altAllele)).chr("1").start(15L).stop(15L).genotypes(testGC).make();
}
 
Example #10
Source File: ExcessHet.java    From gatk with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
@VisibleForTesting
static Pair<Integer, Double> calculateEH(final VariantContext vc, final GenotypesContext genotypes) {
    final GenotypeCounts t = GenotypeUtils.computeDiploidGenotypeCounts(vc, genotypes, ROUND_GENOTYPE_COUNTS);
    // number of samples that have likelihoods
    final int sampleCount = (int) genotypes.stream().filter(g->GenotypeUtils.isDiploidWithLikelihoods(g)).count();

    return calculateEH(vc, t, sampleCount);
}
 
Example #11
Source File: AS_RankSumTest.java    From gatk with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
@Override
public Map<String, Object> annotate(final ReferenceContext ref,
                                    final VariantContext vc,
                                    final AlleleLikelihoods<GATKRead, Allele> likelihoods) {
    Utils.nonNull(vc, "vc is null");

    final GenotypesContext genotypes = vc.getGenotypes();
    if (genotypes == null || genotypes.isEmpty()) {
        return Collections.emptyMap();
    }

    final List<Double> refQuals = new ArrayList<>();
    final List<Double> altQuals = new ArrayList<>();

    if( likelihoods != null) {
        fillQualsFromLikelihood(vc, likelihoods, refQuals, altQuals);
    }

    if ( refQuals.isEmpty() && altQuals.isEmpty() ) {
        return Collections.emptyMap();
    }

    final MannWhitneyU mannWhitneyU = new MannWhitneyU();

    // we are testing that set1 (the alt bases) have lower quality scores than set2 (the ref bases)
    final MannWhitneyU.Result result = mannWhitneyU.test(Doubles.toArray(altQuals), Doubles.toArray(refQuals), MannWhitneyU.TestType.FIRST_DOMINATES);
    final double zScore = result.getZ();

    if (Double.isNaN(zScore)) {
        return Collections.emptyMap();
    } else {
        return Collections.singletonMap(getKeyNames().get(0), String.format("%.3f", zScore));
    }
}
 
Example #12
Source File: QualByDepth.java    From gatk with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
public static int getDepth(final GenotypesContext genotypes, final AlleleLikelihoods<GATKRead, Allele> likelihoods) {
    int depth = 0;
    int ADrestrictedDepth = 0;

    for ( final Genotype genotype : genotypes ) {
        // we care only about variant calls with likelihoods
        if ( !genotype.isHet() && !genotype.isHomVar() ) {
            continue;
        }

        // if we have the AD values for this sample, let's make sure that the variant depth is greater than 1!
        if ( genotype.hasAD() ) {
            final int[] AD = genotype.getAD();
            final int totalADdepth = (int) MathUtils.sum(AD);
            if ( totalADdepth != 0 ) {
                if (totalADdepth - AD[0] > 1) {
                    ADrestrictedDepth += totalADdepth;
                }
                depth += totalADdepth;
                continue;
            }
        }
        // if there is no AD value or it is a dummy value, we want to look to other means to get the depth
        if (likelihoods != null) {
            depth += likelihoods.sampleEvidenceCount(likelihoods.indexOfSample(genotype.getSampleName()));
        } else if ( genotype.hasDP() ) {
            depth += genotype.getDP();
        }
    }

    // if the AD-restricted depth is a usable value (i.e. not zero), then we should use that one going forward
    if ( ADrestrictedDepth > 0 ) {
        depth = ADrestrictedDepth;
    }
    return depth;
}
 
Example #13
Source File: QualByDepth.java    From gatk with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
@Override
public Map<String, Object> annotate(final ReferenceContext ref,
                                    final VariantContext vc,
                                    final AlleleLikelihoods<GATKRead, Allele> likelihoods) {
    Utils.nonNull(vc);
    if ( !vc.hasLog10PError() ) {
        return Collections.emptyMap();
    }

    final GenotypesContext genotypes = vc.getGenotypes();
    if ( genotypes == null || genotypes.isEmpty() ) {
        return Collections.emptyMap();
    }

    final int depth = getDepth(genotypes, likelihoods);

    if ( depth == 0 ) {
        return Collections.emptyMap();
    }

    final double qual = -10.0 * vc.getLog10PError();
    double QD = qual / depth;

    // Hack: see note in the fixTooHighQD method below
    QD = fixTooHighQD(QD);

    return Collections.singletonMap(getKeyNames().get(0), String.format("%.2f", QD));
}
 
Example #14
Source File: CalculateGenotypePosteriors.java    From gatk-protected with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
@Override
public void apply(final VariantContext variant,
                  final ReadsContext readsContext,
                  final ReferenceContext referenceContext,
                  final FeatureContext featureContext) {
    final Collection<VariantContext> vcs = featureContext.getValues(getDrivingVariantsFeatureInput());

    final Collection<VariantContext> otherVCs = featureContext.getValues(supportVariants);

    final int missing = supportVariants.size() - otherVCs.size();

    for ( final VariantContext vc : vcs ) {
        final VariantContext vc_familyPriors;
        final VariantContext vc_bothPriors;

        //do family priors first (if applicable)
        final VariantContextBuilder builder = new VariantContextBuilder(vc);
        //only compute family priors for biallelelic sites
        if (!skipFamilyPriors && vc.isBiallelic()){
            final GenotypesContext gc = famUtils.calculatePosteriorGLs(vc);
            builder.genotypes(gc);
        }
        VariantContextUtils.calculateChromosomeCounts(builder, false);
        vc_familyPriors = builder.make();

        if (!skipPopulationPriors) {
            vc_bothPriors = PosteriorProbabilitiesUtils.calculatePosteriorProbs(vc_familyPriors, otherVCs, missing * numRefIfMissing, globalPrior, !ignoreInputSamples, defaultToAC, useACoff);
        } else {
            final VariantContextBuilder builder2 = new VariantContextBuilder(vc_familyPriors);
            VariantContextUtils.calculateChromosomeCounts(builder, false);
            vc_bothPriors = builder2.make();
        }
        vcfWriter.add(vc_bothPriors);
    }
}
 
Example #15
Source File: SelectVariants.java    From gatk with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
/**
 * Helper method to subset a VC record, modifying some metadata stored in the INFO field (i.e. AN, AC, AF).
 *
 * @param vc       the VariantContext record to subset
 * @param preserveAlleles should we trim constant sequence from the beginning and/or end of all alleles, or preserve it?
 * @param removeUnusedAlternates removes alternate alleles with AC=0
 * @return the subsetted VariantContext
 */
private VariantContext subsetRecord(final VariantContext vc, final boolean preserveAlleles, final boolean removeUnusedAlternates) {
    //subContextFromSamples() always decodes the vc, which is a fairly expensive operation.  Avoid if possible
    if (noSamplesSpecified && !removeUnusedAlternates) {
        return vc;
    }

    // strip out the alternate alleles that aren't being used
    final VariantContext sub = vc.subContextFromSamples(samples, removeUnusedAlternates);

    // If no subsetting happened, exit now
    if (sub.getNSamples() == vc.getNSamples() && sub.getNAlleles() == vc.getNAlleles()) {
        return vc;
    }

    // fix the PL and AD values if sub has fewer alleles than original vc and remove a fraction of the genotypes if needed
    final GenotypesContext oldGs = sub.getGenotypes();
    GenotypesContext newGC = sub.getNAlleles() == vc.getNAlleles() ? oldGs :
            AlleleSubsettingUtils.subsetAlleles(oldGs, 0, vc.getAlleles(), sub.getAlleles(), GenotypeAssignmentMethod.DO_NOT_ASSIGN_GENOTYPES, vc.getAttributeAsInt(VCFConstants.DEPTH_KEY, 0));

    if (fractionGenotypes > 0) {
        final List<Genotype> genotypes = newGC.stream().map(genotype -> randomGenotypes.nextDouble() > fractionGenotypes ? genotype :
                new GenotypeBuilder(genotype).alleles(getNoCallAlleles(genotype.getPloidy())).noGQ().make()).collect(Collectors.toList());
        newGC = GenotypesContext.create(new ArrayList<>(genotypes));
    }

    // since the VC has been subset (either by sample or allele), we need to strip out the MLE tags
    final VariantContextBuilder builder = new VariantContextBuilder(sub);
    builder.rmAttributes(Arrays.asList(GATKVCFConstants.MLE_ALLELE_COUNT_KEY,GATKVCFConstants.MLE_ALLELE_FREQUENCY_KEY));
    builder.genotypes(newGC);
    addAnnotations(builder, vc, sub.getSampleNames());
    final VariantContext subset = builder.make();

    return preserveAlleles? subset : GATKVariantContextUtils.trimAlleles(subset,true,true);
}
 
Example #16
Source File: SelectVariants.java    From gatk with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
/**
 * Checks if vc has a variant call for (at least one of) the samples.
 *
 * @param vc the variant rod VariantContext. Here, the variant is the dataset you're looking for discordances to (e.g. HapMap)
 * @param compVCs the comparison VariantContext (discordance)
 * @return true VariantContexts are discordant, false otherwise
 */
private boolean isDiscordant (final VariantContext vc, final Collection<VariantContext> compVCs) {
    if (vc == null) {
        return false;
    }

    // if we're not looking at specific samples then the absence of a compVC means discordance
    if (noSamplesSpecified) {
        return (compVCs == null || compVCs.isEmpty());
    }

    // check if we find it in the variant rod
    final GenotypesContext genotypes = vc.getGenotypes(samples);
    for (final Genotype g : genotypes) {
        if (sampleHasVariant(g)) {
            // There is a variant called (or filtered with not exclude filtered option set) that is not HomRef for at least one of the samples.
            if (compVCs == null) {
                return true;
            }
            // Look for this sample in the all vcs of the comp ROD track.
            boolean foundVariant = false;
            for (final VariantContext compVC : compVCs) {
                if (haveSameGenotypes(g, compVC.getGenotype(g.getSampleName()))) {
                    foundVariant = true;
                    break;
                }
            }
            // if (at least one sample) was not found in all VCs of the comp ROD, we have discordance
            if (!foundVariant)
                return true;
        }
    }
    return false; // we only get here if all samples have a variant in the comp rod.
}
 
Example #17
Source File: CalculateGenotypePosteriors.java    From gatk with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
@Override
public void apply(final VariantContext variant,
                  final ReadsContext readsContext,
                  final ReferenceContext referenceContext,
                  final FeatureContext featureContext) {

    final Collection<VariantContext> otherVCs = featureContext.getValues(supportVariants);

    //If no resource contains a matching variant, then add numRefIfMissing as a pseudocount to the priors
    List<VariantContext> resourcesWithMatchingStarts = otherVCs.stream()
            .filter(vc -> variant.getStart() == vc.getStart()).collect(Collectors.toList());

    //do family priors first (if applicable)
    final VariantContextBuilder builder = new VariantContextBuilder(variant);
    //only compute family priors for biallelelic sites
    if (!skipFamilyPriors && variant.isBiallelic()){
        final GenotypesContext gc = famUtils.calculatePosteriorGLs(variant);
        builder.genotypes(gc);
    }
    VariantContextUtils.calculateChromosomeCounts(builder, false);
    final VariantContext vc_familyPriors = builder.make();

    final VariantContext vc_bothPriors;
    if (!skipPopulationPriors) {
        vc_bothPriors = PosteriorProbabilitiesUtils.calculatePosteriorProbs(vc_familyPriors, resourcesWithMatchingStarts,
                resourcesWithMatchingStarts.isEmpty() ? numRefIfMissing : 0, options);
    } else {
        vc_bothPriors = vc_familyPriors;
    }
    vcfWriter.add(vc_bothPriors);
}
 
Example #18
Source File: BCFRecordWriter.java    From Hadoop-BAM with MIT License 5 votes vote down vote up
protected void writeRecord(VariantContext vc) {
	final GenotypesContext gc = vc.getGenotypes();
	if (gc instanceof LazyParsingGenotypesContext)
		((LazyParsingGenotypesContext)gc).getParser().setHeaderDataCache(
			gc instanceof LazyVCFGenotypesContext ? vcfHeaderDataCache
			                                      : bcfHeaderDataCache);

	writer.add(vc);
}
 
Example #19
Source File: VCFRecordWriter.java    From Hadoop-BAM with MIT License 5 votes vote down vote up
protected void writeRecord(VariantContext vc) {
	final GenotypesContext gc = vc.getGenotypes();
	if (gc instanceof LazyParsingGenotypesContext)
		((LazyParsingGenotypesContext)gc).getParser().setHeaderDataCache(
			gc instanceof LazyVCFGenotypesContext ? vcfHeaderDataCache
			                                      : bcfHeaderDataCache);

	writer.add(vc);
}
 
Example #20
Source File: MakeSitesOnlyVcf.java    From picard with MIT License 5 votes vote down vote up
/** Makes a new VariantContext with only the desired samples. */
private static VariantContext subsetToSamplesWithOriginalAnnotations(final VariantContext ctx, final Set<String> samples) {
    final VariantContextBuilder builder = new VariantContextBuilder(ctx);
    final GenotypesContext newGenotypes = ctx.getGenotypes().subsetToSamples(samples);
    builder.alleles(ctx.getAlleles());
    return builder.genotypes(newGenotypes).make();
}
 
Example #21
Source File: RankSumTest.java    From gatk with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
@Override
public Map<String, Object> annotate(final ReferenceContext ref,
                                    final VariantContext vc,
                                    final AlleleLikelihoods<GATKRead, Allele> likelihoods) {
    Utils.nonNull(vc, "vc is null");

    final GenotypesContext genotypes = vc.getGenotypes();
    if (genotypes == null || genotypes.isEmpty()) {
        return Collections.emptyMap();
    }

    final List<Double> refQuals = new ArrayList<>();
    final List<Double> altQuals = new ArrayList<>();

    if( likelihoods != null) {
        fillQualsFromLikelihood(vc, likelihoods, refQuals, altQuals);
    }


    if ( refQuals.isEmpty() && altQuals.isEmpty() ) {
        return Collections.emptyMap();
    }

    final MannWhitneyU mannWhitneyU = new MannWhitneyU();

    // we are testing that set1 (the alt bases) have lower quality scores than set2 (the ref bases)
    final MannWhitneyU.Result result = mannWhitneyU.test(Doubles.toArray(altQuals), Doubles.toArray(refQuals), MannWhitneyU.TestType.FIRST_DOMINATES);
    final double zScore = result.getZ();

    if (Double.isNaN(zScore)) {
        return Collections.emptyMap();
    } else {
        return Collections.singletonMap(getKeyNames().get(0), String.format("%.3f", zScore));
    }
}
 
Example #22
Source File: LiftoverUtils.java    From picard with MIT License 5 votes vote down vote up
protected static GenotypesContext fixGenotypes(final GenotypesContext originals, final List<Allele> originalAlleles, final List<Allele> newAlleles) {
    // optimization: if nothing needs to be fixed then don't bother
    if (originalAlleles.equals(newAlleles)) {
        return originals;
    }

    if (originalAlleles.size() != newAlleles.size()) {
        throw new IllegalStateException("Error in allele lists: the original and new allele lists are not the same length: " + originalAlleles.toString() + " / " + newAlleles.toString());
    }

    final Map<Allele, Allele> alleleMap = new HashMap<>();
    for (int idx = 0; idx < originalAlleles.size(); idx++) {
        alleleMap.put(originalAlleles.get(idx), newAlleles.get(idx));
    }

    final GenotypesContext fixedGenotypes = GenotypesContext.create(originals.size());
    for (final Genotype genotype : originals) {
        final List<Allele> fixedAlleles = new ArrayList<>();
        for (final Allele allele : genotype.getAlleles()) {
            if (allele.isNoCall()) {
                fixedAlleles.add(allele);
            } else {
                Allele newAllele = alleleMap.get(allele);
                if (newAllele == null) {
                    throw new IllegalStateException("Allele not found: " + allele.toString() + ", " + originalAlleles + "/ " + newAlleles);
                }
                fixedAlleles.add(newAllele);
            }
        }
        fixedGenotypes.add(new GenotypeBuilder(genotype).alleles(fixedAlleles).make());
    }
    return fixedGenotypes;
}
 
Example #23
Source File: VcfTestUtils.java    From picard with MIT License 5 votes vote down vote up
public static void assertEquals(final GenotypesContext actual, final GenotypesContext expected) {
    if (expected == null) {
        Assert.assertNull(actual);
        return;
    }
    Assert.assertEquals(actual.getSampleNamesOrderedByName(), expected.getSampleNamesOrderedByName(), "Sample names differ");

    for (final String name : expected.getSampleNamesOrderedByName()) {
        Assert.assertEquals(actual.get(name).getAlleles(), expected.get(name).getAlleles(), "Alleles differ for sample " + name);
        Assert.assertEquals(actual.get(name).getAD(), expected.get(name).getAD());
        Assert.assertEquals(actual.get(name).getPL(), expected.get(name).getPL());
    }
}
 
Example #24
Source File: AS_QualByDepth.java    From gatk with BSD 3-Clause "New" or "Revised" License 4 votes vote down vote up
/**
 * Uses the "AS_QUAL" key, which must be computed by the genotyping engine in GenotypeGVCFs, to
 * calculate the final AS_QD annotation on the read.
 *
 * @param vc -- contains the final set of alleles, possibly subset by GenotypeGVCFs
 * @param originalVC -- used to get all the alleles for all gVCFs
 * @return
 */
@Override
public Map<String, Object> finalizeRawData(VariantContext vc, VariantContext originalVC) {
    //we need to use the AS_QUAL value that was added to the VC by the GenotypingEngine
    if ( !vc.hasAttribute(GATKVCFConstants.AS_QUAL_KEY) && !vc.hasAttribute(GATKVCFConstants.AS_RAW_QUAL_APPROX_KEY)) {
        return null;
    }

    final GenotypesContext genotypes = vc.getGenotypes();
    if ( genotypes == null || genotypes.isEmpty() ) {
        return null;
    }

    final List<Integer> standardDepth;
    if (originalVC.hasAttribute(GATKVCFConstants.AS_VARIANT_DEPTH_KEY)) {
        standardDepth = Arrays.stream(originalVC.getAttributeAsString(GATKVCFConstants.AS_VARIANT_DEPTH_KEY, "")
                .split(AnnotationUtils.ALLELE_SPECIFIC_SPLIT_REGEX)).mapToInt(Integer::parseInt).boxed().collect(Collectors.toList());
    } else {
        standardDepth = getAlleleDepths(genotypes);
    }
    if (standardDepth == null) { //all no-calls and homRefs
        return null;
    }

    List<Integer> alleleQualList = parseQualList(vc);


    // Don't normalize indel length for AS_QD because it will only be called from GenotypeGVCFs, never UG
    List<Double> QDlist = new ArrayList<>();
    double refDepth = (double)standardDepth.get(0);
    for (int i = 0; i < alleleQualList.size(); i++) {
        double AS_QD = alleleQualList.get(i) / ((double)standardDepth.get(i+1) + refDepth); //+1 to skip the reference field of the AD, add ref counts to each to match biallelic case
        // Hack: see note in the fixTooHighQD method below
        AS_QD = QualByDepth.fixTooHighQD(AS_QD);
        QDlist.add(AS_QD);
    }

    final Map<String, Object> map = new HashMap<>();
    map.put(getKeyNames().get(0), AnnotationUtils.encodeValueList(QDlist, "%.2f"));
    if (vc.hasAttribute(GATKVCFConstants.AS_RAW_QUAL_APPROX_KEY)) {
        //keep AS_QUALapprox for Gnarly Pipeline because we don't subset alts or output genotypes if there are more than 6 alts
        map.put(GATKVCFConstants.AS_RAW_QUAL_APPROX_KEY, StringUtils.join(alleleQualList, AnnotationUtils.LIST_DELIMITER));
    }
    return map;
}
 
Example #25
Source File: CombineGenotypingArrayVcfs.java    From picard with MIT License 4 votes vote down vote up
/**
 * Merges multiple VariantContexts all for the same locus into a single hybrid.
 *
 * @param variantContexts           list of VCs
 * @return new VariantContext       representing the merge of variantContexts
 */
public static VariantContext merge(final List<VariantContext> variantContexts) {
    if ( variantContexts == null || variantContexts.isEmpty() )
        return null;

    // establish the baseline info from the first VC
    final VariantContext first = variantContexts.get(0);
    final String name = first.getSource();

    final Set<String> filters = new HashSet<>();

    int depth = 0;
    double log10PError = CommonInfo.NO_LOG10_PERROR;
    boolean anyVCHadFiltersApplied = false;
    GenotypesContext genotypes = GenotypesContext.create();

    // Go through all the VCs, verify that the loci and ID and other attributes agree.
    final Map<String, Object> firstAttributes = first.getAttributes();
    for (final VariantContext vc : variantContexts ) {
        if ((vc.getStart() != first.getStart()) || (!vc.getContig().equals(first.getContig()))) {
            throw new PicardException("Mismatch in loci among input VCFs");
        }
        if (!vc.getID().equals(first.getID())) {
            throw new PicardException("Mismatch in ID field among input VCFs");
        }
        if (!vc.getReference().equals(first.getReference())) {
            throw new PicardException("Mismatch in REF allele among input VCFs");
        }
        checkThatAllelesMatch(vc, first);

        genotypes.addAll(vc.getGenotypes());

        // We always take the QUAL of the first VC with a non-MISSING qual for the combined value
        if ( log10PError == CommonInfo.NO_LOG10_PERROR )
            log10PError =  vc.getLog10PError();

        filters.addAll(vc.getFilters());
        anyVCHadFiltersApplied |= vc.filtersWereApplied();

        // add attributes
        // special case DP (add it up)
        if (vc.hasAttribute(VCFConstants.DEPTH_KEY))
            depth += vc.getAttributeAsInt(VCFConstants.DEPTH_KEY, 0);

        // Go through all attributes - if any differ (other than AC, AF, or AN) that's an error.  (recalc AC, AF, and AN later)
        for (final Map.Entry<String, Object> p : vc.getAttributes().entrySet()) {
            final String key = p.getKey();
            if ((!key.equals("AC")) && (!key.equals("AF")) && (!key.equals("AN")) && (!key.equals("devX_AB")) && (!key.equals("devY_AB"))) {
                final Object value = p.getValue();
                final Object extantValue = firstAttributes.get(key);
                if (extantValue == null) {
                    // attribute in one VCF but not another.  Die!
                    throw new PicardException("Attribute '" + key + "' not found in all VCFs");
                }
                else if (!extantValue.equals(value)) {
                    // Attribute disagrees in value between one VCF Die! (if not AC, AF, nor AN, skipped above)
                    throw new PicardException("Values for attribute '" + key + "' disagrees among input VCFs");
                }
            }
        }
    }

    if (depth > 0)
        firstAttributes.put(VCFConstants.DEPTH_KEY, String.valueOf(depth));

    final VariantContextBuilder builder = new VariantContextBuilder().source(name).id(first.getID());
    builder.loc(first.getContig(), first.getStart(), first.getEnd());
    builder.alleles(first.getAlleles());
    builder.genotypes(genotypes);

    builder.attributes(new TreeMap<>(firstAttributes));
    // AC, AF, and AN are recalculated here
    VariantContextUtils.calculateChromosomeCounts(builder, false);

    builder.log10PError(log10PError);
    if ( anyVCHadFiltersApplied ) {
        builder.filters(filters.isEmpty() ? filters : new TreeSet<>(filters));
    }

    return builder.make();
}
 
Example #26
Source File: LiftoverUtils.java    From picard with MIT License 4 votes vote down vote up
/**
 * method to swap the reference and alt alleles of a bi-allelic, SNP
 *
 * @param vc                   the {@link VariantContext} (bi-allelic SNP) that needs to have it's REF and ALT alleles swapped.
 * @param annotationsToReverse INFO field annotations (of double value) that will be reversed (x->1-x)
 * @param annotationsToDrop    INFO field annotations that will be dropped from the result since they are invalid when REF and ALT are swapped
 * @return a new {@link VariantContext} with alleles swapped, INFO fields modified and in the genotypes, GT, AD and PL corrected appropriately
 */
public static VariantContext swapRefAlt(final VariantContext vc, final Collection<String> annotationsToReverse, final Collection<String> annotationsToDrop) {

    if (!vc.isBiallelic() || !vc.isSNP()) {
        throw new IllegalArgumentException("swapRefAlt can only process biallelic, SNPS, found " + vc.toString());
    }

    final VariantContextBuilder swappedBuilder = new VariantContextBuilder(vc);

    swappedBuilder.attribute(SWAPPED_ALLELES, true);

    // Use getBaseString() (rather than the Allele itself) in order to create new Alleles with swapped
    // reference and non-variant attributes
    swappedBuilder.alleles(Arrays.asList(vc.getAlleles().get(1).getBaseString(), vc.getAlleles().get(0).getBaseString()));

    final Map<Allele, Allele> alleleMap = new HashMap<>();

    // A mapping from the old allele to the new allele, to be used when fixing the genotypes
    alleleMap.put(vc.getAlleles().get(0), swappedBuilder.getAlleles().get(1));
    alleleMap.put(vc.getAlleles().get(1), swappedBuilder.getAlleles().get(0));

    final GenotypesContext swappedGenotypes = GenotypesContext.create(vc.getGenotypes().size());
    for (final Genotype genotype : vc.getGenotypes()) {
        final List<Allele> swappedAlleles = new ArrayList<>();
        for (final Allele allele : genotype.getAlleles()) {
            if (allele.isNoCall()) {
                swappedAlleles.add(allele);
            } else {
                swappedAlleles.add(alleleMap.get(allele));
            }
        }
        // Flip AD
        final GenotypeBuilder builder = new GenotypeBuilder(genotype).alleles(swappedAlleles);
        if (genotype.hasAD() && genotype.getAD().length == 2) {
            final int[] ad = ArrayUtils.clone(genotype.getAD());
            ArrayUtils.reverse(ad);
            builder.AD(ad);
        } else {
            builder.noAD();
        }

        //Flip PL
        if (genotype.hasPL() && genotype.getPL().length == 3) {
            final int[] pl = ArrayUtils.clone(genotype.getPL());
            ArrayUtils.reverse(pl);
            builder.PL(pl);
        } else {
            builder.noPL();
        }
        swappedGenotypes.add(builder.make());
    }
    swappedBuilder.genotypes(swappedGenotypes);

    for (final String key : vc.getAttributes().keySet()) {
        if (annotationsToDrop.contains(key)) {
            swappedBuilder.rmAttribute(key);
        } else if (annotationsToReverse.contains(key) && !vc.getAttributeAsString(key, "").equals(VCFConstants.MISSING_VALUE_v4)) {
            final double attributeToReverse = vc.getAttributeAsDouble(key, -1);

            if (attributeToReverse < 0 || attributeToReverse > 1) {
                log.warn("Trying to reverse attribute " + key +
                        " but found value that isn't between 0 and 1: (" + attributeToReverse + ") in variant " + vc + ". Results might be wrong.");
            }
            swappedBuilder.attribute(key, 1 - attributeToReverse);
        }
    }

    return swappedBuilder.make();
}
 
Example #27
Source File: FisherStrand.java    From gatk with BSD 3-Clause "New" or "Revised" License 4 votes vote down vote up
@Override
protected Map<String, Object> calculateAnnotationFromGTfield(final GenotypesContext genotypes){
    final int[][] tableFromPerSampleAnnotations = getTableFromSamples(genotypes, MIN_COUNT);
    return ( tableFromPerSampleAnnotations != null )? annotationForOneTable(pValueForContingencyTable(tableFromPerSampleAnnotations)) : null;
}
 
Example #28
Source File: PedigreeAnnotation.java    From gatk with BSD 3-Clause "New" or "Revised" License 4 votes vote down vote up
protected GenotypesContext getFounderGenotypes(VariantContext vc) {
    if ((pedigreeFile!= null) && (!hasAddedPedigreeFounders)) {
        initializeSampleDBAndSetFounders(pedigreeFile);
    }
    return (founderIds == null || founderIds.isEmpty()) ? vc.getGenotypes() : vc.getGenotypes(new HashSet<>(founderIds));
}
 
Example #29
Source File: AS_StrandBiasTest.java    From gatk with BSD 3-Clause "New" or "Revised" License 4 votes vote down vote up
@Override
//Allele-specific annotations cannot be called from walkers other than HaplotypeCaller
protected Map<String, Object> calculateAnnotationFromGTfield(final GenotypesContext genotypes){
    return Collections.emptyMap();
}
 
Example #30
Source File: StrandOddsRatio.java    From gatk with BSD 3-Clause "New" or "Revised" License 4 votes vote down vote up
@Override
protected Map<String, Object> calculateAnnotationFromGTfield(final GenotypesContext genotypes){
    final int[][] tableFromPerSampleAnnotations = getTableFromSamples(genotypes, MIN_COUNT);
    return tableFromPerSampleAnnotations != null ? annotationForOneTable(calculateSOR(tableFromPerSampleAnnotations)) : null;
}