htsjdk.variant.variantcontext.Genotype Java Examples

The following examples show how to use htsjdk.variant.variantcontext.Genotype. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: TLODBlock.java    From gatk with BSD 3-Clause "New" or "Revised" License 6 votes vote down vote up
/**
 * Add information from this Genotype to this band.
 *
 * @param pos Current genomic position. Must be 1 base after the previous position
 * @param genotype A non-null Genotype with TLOD and DP attributes
 */
@Override
public void add(final int pos, final int newEnd, final Genotype genotype) {
    Utils.nonNull(genotype, "genotype cannot be null");
    Utils.validateArg( pos == end + 1,"adding genotype at pos " + pos + " isn't contiguous with previous end " + end);
    // Make sure the LOD is within the bounds of this band
    final double currentLOD = Double.parseDouble(genotype.getExtendedAttribute(GATKVCFConstants.TUMOR_LOG_10_ODDS_KEY).toString());
    if ( !withinBounds(currentLOD)) {
        throw new IllegalArgumentException("cannot add a genotype with LOD=" + currentLOD + " because it's not within bounds ["
                + this.getLODLowerBound() + ',' + this.getLODUpperBound() + ')');
    }

    if( minBlockLOD == Double.POSITIVE_INFINITY || currentLOD < minBlockLOD) {
        minBlockLOD = currentLOD;
    }

    end = newEnd;
    DPs.add(Math.max(genotype.getDP(), 0)); // DP must be >= 0
}
 
Example #2
Source File: StrandBiasTest.java    From gatk with BSD 3-Clause "New" or "Revised" License 6 votes vote down vote up
@SuppressWarnings("unchecked")
public static int[] getStrandCounts(final Genotype g) {
    int[] data;
    if ( g.getAnyAttribute(GATKVCFConstants.STRAND_BIAS_BY_SAMPLE_KEY).getClass().equals(String.class)) {
        final String sbbsString = (String)g.getAnyAttribute(GATKVCFConstants.STRAND_BIAS_BY_SAMPLE_KEY);
        data = encodeSBBS(sbbsString);
    } else if (g.getAnyAttribute(GATKVCFConstants.STRAND_BIAS_BY_SAMPLE_KEY).getClass().equals(ArrayList.class)) {
       if (((ArrayList<Object>)g.getAnyAttribute(GATKVCFConstants.STRAND_BIAS_BY_SAMPLE_KEY)).get(0) instanceof Integer) {
            data = encodeSBBS((ArrayList<Integer>) g.getAnyAttribute(GATKVCFConstants.STRAND_BIAS_BY_SAMPLE_KEY));
        }
        else if (((ArrayList<Object>)g.getAnyAttribute(GATKVCFConstants.STRAND_BIAS_BY_SAMPLE_KEY)).get(0) instanceof String) {
            final List<Integer> sbbsList = new ArrayList<>();
            for (final Object o : (ArrayList<Object>) g.getAnyAttribute(GATKVCFConstants.STRAND_BIAS_BY_SAMPLE_KEY)) {
                sbbsList.add(Integer.parseInt(o.toString()));
            }
            data = encodeSBBS(sbbsList);
        } else {
            throw new GATKException("Unexpected " + GATKVCFConstants.STRAND_BIAS_BY_SAMPLE_KEY + " type");
        }

    } else {
        throw new GATKException("Unexpected " + GATKVCFConstants.STRAND_BIAS_BY_SAMPLE_KEY + " type");
    }
    return data;
}
 
Example #3
Source File: AmberVCF.java    From hmftools with GNU General Public License v3.0 6 votes vote down vote up
@NotNull
private VariantContext create(@NotNull final BaseDepth snp) {
    final List<Allele> alleles = Lists.newArrayList();
    alleles.add(Allele.create(snp.ref().toString(), true));
    alleles.add(Allele.create(snp.alt().toString(), false));

    final List<Integer> adField = Lists.newArrayList();
    adField.add(snp.refSupport());
    adField.add(snp.altSupport());

    final Genotype normal = new GenotypeBuilder(config.primaryReference()).DP(snp.readDepth())
            .AD(adField.stream().mapToInt(i -> i).toArray())
            .alleles(alleles)
            .make();

    final VariantContextBuilder builder = new VariantContextBuilder().chr(snp.chromosome())
            .start(snp.position())
            .computeEndFromAlleles(alleles, (int) snp.position())
            .genotypes(normal)
            .alleles(alleles);

    return builder.make();
}
 
Example #4
Source File: EvaluateCopyNumberTriStateCalls.java    From gatk-protected with BSD 3-Clause "New" or "Revised" License 6 votes vote down vote up
/**
 * Calculate the truth call quality when we collapse copy-numbers into deletion, neutral (ref) and duplication.
 *
 * @param truthGenotype the genotype containing the posteriors log10 to use for the calculation.
 * @param truthCopyNumber the truth call copy number.
 * @return never {@code null}, a Phred scaled quality score 0 or greater.
 */
private double calculateTruthQuality(final Genotype truthGenotype, final int truthCopyNumber) {
    final double[] truthPosteriors = GATKProtectedVariantContextUtils.getAttributeAsDoubleArray(truthGenotype,
            GS_COPY_NUMBER_POSTERIOR_KEY, () -> new double[truthCopyNumber + 1],
            Double.NEGATIVE_INFINITY);
    final double totalLog10Prob = MathUtils.log10SumLog10(truthPosteriors);
    final double delLog10Prob = MathUtils.log10SumLog10(truthPosteriors, 0, Math.min(truthNeutralCopyNumber, truthPosteriors.length)) - totalLog10Prob;
    final double neutralLog10Prob = truthNeutralCopyNumber >= truthPosteriors.length ? Double.NEGATIVE_INFINITY : truthPosteriors[truthNeutralCopyNumber] - totalLog10Prob;
    final double dupLog10Prob = truthNeutralCopyNumber + 1 >= truthPosteriors.length ? Double.NEGATIVE_INFINITY
            : MathUtils.log10SumLog10(truthPosteriors, truthNeutralCopyNumber + 1, truthPosteriors.length) - totalLog10Prob;
    if (truthCopyNumber < truthNeutralCopyNumber) {
        return -10.0 * MathUtils.approximateLog10SumLog10(neutralLog10Prob, dupLog10Prob);
    } else if (truthCopyNumber > truthNeutralCopyNumber) {
        return -10.0 * MathUtils.approximateLog10SumLog10(neutralLog10Prob, delLog10Prob);
    } else {
        return -10.0 * MathUtils.approximateLog10SumLog10(delLog10Prob, dupLog10Prob);
    }
}
 
Example #5
Source File: SageVariantContextFactory.java    From hmftools with GNU General Public License v3.0 6 votes vote down vote up
@NotNull
private static Genotype createGenotype(boolean germline, @NotNull final VariantHotspot variant,
        @NotNull final ReadContextCounter counter) {
    return new GenotypeBuilder(counter.sample()).DP(counter.depth())
            .AD(new int[] { counter.refSupport(), counter.altSupport() })
            .attribute(READ_CONTEXT_QUALITY, counter.quality())
            .attribute(READ_CONTEXT_COUNT, counter.counts())
            .attribute(READ_CONTEXT_IMPROPER_PAIR, counter.improperPair())
            .attribute(READ_CONTEXT_JITTER, counter.jitter())
            .attribute(RAW_ALLELIC_DEPTH, new int[] { counter.rawRefSupport(), counter.rawAltSupport() })
            .attribute(RAW_ALLELIC_BASE_QUALITY, new int[] { counter.rawRefBaseQuality(), counter.rawAltBaseQuality() })
            .attribute(RAW_DEPTH, counter.rawDepth())
            .attribute(VCFConstants.ALLELE_FREQUENCY_KEY, counter.vaf())
            .alleles(createGenotypeAlleles(germline, variant, counter))
            .make();
}
 
Example #6
Source File: SelectVariants.java    From gatk with BSD 3-Clause "New" or "Revised" License 6 votes vote down vote up
/**
 * Initialize cache of allele anyploid indices
 *
 * Initialize the cache of PL index to a list of alleles for each ploidy.
 *
 * @param vc    Variant Context
 */
private void initalizeAlleleAnyploidIndicesCache(final VariantContext vc) {
    if (vc.getType() != VariantContext.Type.NO_VARIATION) { // Bypass if not a variant
        for (final Genotype g : vc.getGenotypes()) {
            // Make a new entry if the we have not yet cached a PL to allele indices map for this ploidy and allele count
            // skip if there are no PLs -- this avoids hanging on high-allelic somatic samples, for example, where
            // there's no need for the PL indices since they don't exist
            if (g.getPloidy() != 0 && (!ploidyToNumberOfAlleles.containsKey(g.getPloidy()) || ploidyToNumberOfAlleles.get(g.getPloidy()) < vc.getNAlleles())) {
                if (vc.getGenotypes().stream().anyMatch(Genotype::hasLikelihoods)) {
                    GenotypeLikelihoods.initializeAnyploidPLIndexToAlleleIndices(vc.getNAlleles() - 1, g.getPloidy());
                    ploidyToNumberOfAlleles.put(g.getPloidy(), vc.getNAlleles());
                }
            }
        }
    }

}
 
Example #7
Source File: StrelkaAllelicDepth.java    From hmftools with GNU General Public License v3.0 6 votes vote down vote up
@NotNull
private static int[] readAD(@NotNull final Genotype tumorGenotype, @NotNull final List<Allele> alleles,
        @NotNull Function<Allele, String> alleleKey) {
    final List<Integer> alleleAds = alleles.stream().map(allele -> {
        final String[] alleleADs = tumorGenotype.getExtendedAttribute(alleleKey.apply(allele), "0").toString().split(SEPARATOR);
        if (alleleADs.length > 0) {
            try {
                return Integer.parseInt(alleleADs[0]);
            } catch (final NumberFormatException e) {
                return 0;
            }
        }
        return 0;
    }).collect(Collectors.toList());
    return alleleAds.stream().mapToInt(Integer::intValue).toArray();
}
 
Example #8
Source File: StrelkaAllelicDepth.java    From hmftools with GNU General Public License v3.0 6 votes vote down vote up
@NotNull
public static VariantContext enrich(@NotNull final VariantContext context) {
    if (!context.isIndel() && !context.isSNP() ) {
        return context;
    }

    final VariantContextBuilder contextBuilder = new VariantContextBuilder(context).noGenotypes();

    final List<Allele> alleles = context.getAlleles();
    final Function<Allele, String> alleleKey = alleleKey(context);

    final List<Genotype> updatedGenotypes = Lists.newArrayList();
    for (Genotype genotype : context.getGenotypes()) {
        if (!genotype.hasAD() && hasRequiredAttributes(genotype, alleles, alleleKey)) {
            final GenotypeBuilder genotypeBuilder = new GenotypeBuilder(genotype).AD(readAD(genotype, alleles, alleleKey));
            updatedGenotypes.add(genotypeBuilder.make());
        } else {
            updatedGenotypes.add(genotype);
        }
    }

    return contextBuilder.genotypes(updatedGenotypes).make();
}
 
Example #9
Source File: DepthPerAlleleBySample.java    From gatk with BSD 3-Clause "New" or "Revised" License 6 votes vote down vote up
@Override
public void annotate(final ReferenceContext ref,
                     final VariantContext vc,
                     final Genotype g,
                     final GenotypeBuilder gb,
                     final AlleleLikelihoods<GATKRead, Allele> likelihoods) {
    Utils.nonNull(gb, "gb is null");
    Utils.nonNull(vc, "vc is null");

    if ( g == null || !g.isCalled() || likelihoods == null) {
        return;
    }
    final Set<Allele> alleles = new LinkedHashSet<>(vc.getAlleles());

    // make sure that there's a meaningful relationship between the alleles in the likelihoods and our VariantContext
    Utils.validateArg(likelihoods.alleles().containsAll(alleles), () -> "VC alleles " + alleles + " not a  subset of AlleleLikelihoods alleles " + likelihoods.alleles());

    gb.AD(annotateWithLikelihoods(vc, g, alleles, likelihoods));
}
 
Example #10
Source File: Mutect2FilteringEngine.java    From gatk-protected with BSD 3-Clause "New" or "Revised" License 6 votes vote down vote up
private void applyStrandArtifactFilter(final M2FiltersArgumentCollection MTFAC, final VariantContext vc, final Collection<String> filters) {
    Genotype tumorGenotype = vc.getGenotype(tumorSample);
    final double[] posteriorProbabilities = GATKProtectedVariantContextUtils.getAttributeAsDoubleArray(
            tumorGenotype, (StrandArtifact.POSTERIOR_PROBABILITIES_KEY), () -> null, -1);
    final double[] mapAlleleFractionEstimates = GATKProtectedVariantContextUtils.getAttributeAsDoubleArray(
            tumorGenotype, (StrandArtifact.MAP_ALLELE_FRACTIONS_KEY), () -> null, -1);

    if (posteriorProbabilities == null || mapAlleleFractionEstimates == null){
        return;
    }

    final int maxZIndex = MathUtils.maxElementIndex(posteriorProbabilities);

    if (maxZIndex == StrandArtifact.StrandArtifactZ.NO_ARTIFACT.ordinal()){
        return;
    }

    if (posteriorProbabilities[maxZIndex] > MTFAC.STRAND_ARTIFACT_POSTERIOR_PROB_THRESHOLD &&
            mapAlleleFractionEstimates[maxZIndex] < MTFAC.STRAND_ARTIFACT_ALLELE_FRACTION_THRESHOLD){
        filters.add(GATKVCFConstants.STRAND_ARTIFACT_FILTER_NAME);
    }
}
 
Example #11
Source File: XHMMSegmentGenotyperIntegrationTest.java    From gatk-protected with BSD 3-Clause "New" or "Revised" License 6 votes vote down vote up
private void assertVariantsAreCoveredBySegments(final List<VariantContext> variants,
                                                final List<HiddenStateSegmentRecord<CopyNumberTriState, Target>> variantSegments) {
    for (final VariantContext variant : variants) {
        final List<HiddenStateSegmentRecord<CopyNumberTriState, Target>> matches =
                variantSegments.stream()
                .filter(s -> new SimpleInterval(variant).equals(s.getSegment().getInterval()))
                .collect(Collectors.toList());
        Assert.assertFalse(matches.isEmpty());
        for (final Genotype genotype : variant.getGenotypes()) {
            final boolean discovery = genotype.getExtendedAttribute(XHMMSegmentGenotyper.DISCOVERY_KEY).toString().equals(XHMMSegmentGenotyper.DISCOVERY_TRUE);
            if (discovery) {
                Assert.assertTrue(matches.stream().anyMatch(s -> s.getSampleName().equals(genotype.getSampleName())));
            } else {
                Assert.assertTrue(matches.stream().noneMatch(s -> s.getSampleName().equals(genotype.getSampleName())));
            }
        }
    }
}
 
Example #12
Source File: StrandBiasTest.java    From gatk with BSD 3-Clause "New" or "Revised" License 6 votes vote down vote up
@Override
//template method for calculating strand bias annotations using the three different methods
public Map<String, Object> annotate(final ReferenceContext ref,
                                    final VariantContext vc,
                                    final AlleleLikelihoods<GATKRead, Allele> likelihoods) {
    Utils.nonNull(vc);
    if ( !vc.isVariant() ) {
        return Collections.emptyMap();
    }

    // if the genotype and strand bias are provided, calculate the annotation from the Genotype (GT) field
    if ( vc.hasGenotypes() ) {
        for (final Genotype g : vc.getGenotypes()) {
            if (g.hasAnyAttribute(GATKVCFConstants.STRAND_BIAS_BY_SAMPLE_KEY)) {
                return calculateAnnotationFromGTfield(vc.getGenotypes());
            }
        }
    }

    if (likelihoods != null) {
        return calculateAnnotationFromLikelihoods(likelihoods, vc);
    }
    return Collections.emptyMap();
}
 
Example #13
Source File: ConvertGSVariantsToSegmentsIntegrationTest.java    From gatk-protected with BSD 3-Clause "New" or "Revised" License 6 votes vote down vote up
private List<HiddenStateSegmentRecord<CopyNumberTriState, Target>> composeExpectedSegments(final File vcf, final TargetCollection<Target> targets) throws IOException {
    final VCFFileReader reader = new VCFFileReader(vcf, false);
    final List<HiddenStateSegmentRecord<CopyNumberTriState, Target>> result = new ArrayList<>();
    reader.iterator().forEachRemaining(vc -> {
        final int targetCount = targets.indexRange(vc).size();
        for (final Genotype genotype : vc.getGenotypes()) {
            final int cn = Integer.parseInt(genotype.getExtendedAttribute("CN").toString());
            final double[] cnp = Stream.of(genotype.getExtendedAttribute("CNP").toString().replaceAll("\\[\\]", "").split(","))
                    .mapToDouble(Double::parseDouble).toArray();

            final double cnpSum = MathUtils.approximateLog10SumLog10(cnp);
            final CopyNumberTriState call = expectedCall(cn);
            final double exactLog10Prob = expectedExactLog10(call, cnp);
            final HiddenStateSegment<CopyNumberTriState, Target> expectedSegment = new HiddenStateSegment<>(
                    new SimpleInterval(vc), targetCount, Double.parseDouble(genotype.getExtendedAttribute("CNF").toString()),
                    0.000, call, -10.0 * exactLog10Prob, Double.NaN,  Double.NaN, Double.NaN,
                      -10.0 * (cnp[ConvertGSVariantsToSegments.NEUTRAL_COPY_NUMBER_DEFAULT] - cnpSum)
            );
            result.add(new HiddenStateSegmentRecord<>(genotype.getSampleName(), expectedSegment));
        }
    });
    return result;
}
 
Example #14
Source File: VariantContextSingletonFilterTest.java    From Drop-seq with MIT License 6 votes vote down vote up
@Test
public void filterOutHetSinglton() {
 VariantContextBuilder b = new VariantContextBuilder();
 Allele a1 = Allele.create("A", true);
 Allele a2 = Allele.create("T", false);

 final List<Allele> allelesHet = new ArrayList<>(Arrays.asList(a1,a2));
 final List<Allele> allelesRef = new ArrayList<>(Arrays.asList(a1,a1));
 final List<Allele> allelesAlt = new ArrayList<>(Arrays.asList(a2,a2));

 // this has a het singleton.
 Collection<Genotype> genotypes = new ArrayList<>();
 genotypes.add(GenotypeBuilder.create("donor1", allelesRef));
 genotypes.add(GenotypeBuilder.create("donor2", allelesHet));
 genotypes.add(GenotypeBuilder.create("donor3", allelesRef));

 VariantContext vc = b.alleles(allelesHet).start(1).stop(1).chr("1").genotypes(genotypes).make();
 Assert.assertNotNull(vc);

 Iterator<VariantContext> underlyingIterator = Collections.emptyIterator();

 VariantContextSingletonFilter f = new VariantContextSingletonFilter(underlyingIterator, true);
 boolean t1 = f.filterOut(vc);
 Assert.assertFalse(t1);

}
 
Example #15
Source File: XHMMSegmentGenotyperIntegrationTest.java    From gatk-protected with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
private void assertVariantsInfoFieldsAreConsistent(final List<VariantContext> variants) {
    for (final VariantContext variant : variants) {
        final int expectedAN = variant.getGenotypes().size();
        final int[] expectedAC = new int[CopyNumberTriState.values().length];
        final List<Allele> alleles = variant.getAlleles();
        for (final Genotype genotype : variant.getGenotypes()) {
            Assert.assertEquals(genotype.getAlleles().size(), 1);
            final int alleleIndex = alleles.indexOf(genotype.getAllele(0));
            Assert.assertTrue(alleleIndex >= 0);
            expectedAC[alleleIndex]++;
        }
        Assert.assertEquals(variant.getAlleles(), CopyNumberTriStateAllele.ALL_ALLELES);
        Assert.assertTrue(variant.hasAttribute(XHMMSegmentGenotyper.NUMBER_OF_TARGETS_KEY));
        Assert.assertTrue(variant.hasAttribute(VCFConstants.ALLELE_COUNT_KEY));
        Assert.assertTrue(variant.hasAttribute(VCFConstants.END_KEY));
        Assert.assertTrue(variant.hasAttribute(VCFConstants.ALLELE_FREQUENCY_KEY));
        Assert.assertTrue(variant.hasAttribute(VCFConstants.ALLELE_NUMBER_KEY));
        final int expectedANAnotherWay = IntStream.of(expectedAC).sum();
        Assert.assertEquals(expectedANAnotherWay, expectedAN);
        Assert.assertEquals(variant.getAttributeAsInt(VCFConstants.ALLELE_NUMBER_KEY, -1), expectedAN);
        final double[] expectedAF = IntStream.of(expectedAC).mapToDouble(c -> c / (double) expectedAN).toArray();
        final double[] observedAFWithoutRef = variant.getAttributeAsList(VCFConstants.ALLELE_FREQUENCY_KEY).stream()
                .mapToDouble(o -> Double.parseDouble(String.valueOf(o))).toArray();
        Assert.assertEquals(observedAFWithoutRef.length, expectedAF.length - 1);
        for (int i = 0; i < observedAFWithoutRef.length; i++) {
            Assert.assertEquals(observedAFWithoutRef[i], expectedAF[i+1], 0.001);
        }
        final int[] observedACWithoutRef = variant.getAttributeAsList(VCFConstants.ALLELE_COUNT_KEY).stream()
                .mapToInt(o -> Integer.parseInt(String.valueOf(o))).toArray();
        Assert.assertEquals(observedACWithoutRef.length, expectedAC.length - 1);
        for (int i = 0; i < observedACWithoutRef.length; i++) {
            Assert.assertEquals(observedACWithoutRef[i], expectedAC[i+1]);
        }
        Assert.assertEquals(variant.getAttributeAsInt(XHMMSegmentGenotyper.NUMBER_OF_TARGETS_KEY, -1),
                XHMMSegmentCallerBaseIntegrationTest.REALISTIC_TARGETS.targetCount(variant));
    }
}
 
Example #16
Source File: GenotypeSummaries.java    From gatk with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
@Override
public Map<String, Object> annotate(final ReferenceContext ref,
                                    final VariantContext vc,
                                    final AlleleLikelihoods<GATKRead, Allele> likelihoods) {
    Utils.nonNull(vc);
    if ( ! vc.hasGenotypes() ) {
        return Collections.emptyMap();
    }

    final Map<String,Object> returnMap = new LinkedHashMap<>();
    returnMap.put(GATKVCFConstants.NOCALL_CHROM_KEY, vc.getNoCallCount());

    final DescriptiveStatistics stats = new DescriptiveStatistics();
    for( final Genotype g : vc.getGenotypes() ) {
        if( g.hasGQ() ) {
            stats.addValue(g.getGQ());
        }
    }
    if( stats.getN() > 0L ) {
        returnMap.put(GATKVCFConstants.GQ_MEAN_KEY, String.format("%.2f", stats.getMean()));
        if( stats.getN() > 1L ) {
            returnMap.put(GATKVCFConstants.GQ_STDEV_KEY, String.format("%.2f", stats.getStandardDeviation()));
        }
    }

    return returnMap;
}
 
Example #17
Source File: XHMMSegmentGenotyperIntegrationTest.java    From gatk-protected with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
private void assertVariantSegmentsAreCovered(final List<VariantContext> variants,
                                             final List<HiddenStateSegmentRecord<CopyNumberTriState, Target>> variantSegments) {
    for (final HiddenStateSegmentRecord<CopyNumberTriState, Target> variantSegment : variantSegments) {
        final Optional<VariantContext> match = variants.stream()
                .filter(vc -> new SimpleInterval(vc).equals(variantSegment.getSegment().getInterval()))
                .findFirst();
        Assert.assertTrue(match.isPresent());
        final VariantContext matchedVariant = match.get();
        final Genotype genotype = matchedVariant.getGenotype(variantSegment.getSampleName());
        final String discovery = genotype.getAnyAttribute(XHMMSegmentGenotyper.DISCOVERY_KEY).toString();
        Assert.assertTrue(discovery.equals(XHMMSegmentGenotyper.DISCOVERY_TRUE));
        final CopyNumberTriState call = variantSegment.getSegment().getCall();

        final List<Allele> gt = genotype.getAlleles();
        Assert.assertEquals(gt.size(), 1);
        // The call may not be the same for case where the event-quality is relatively low.
        if (variantSegment.getSegment().getEventQuality() > 10) {
            Assert.assertEquals(CopyNumberTriStateAllele.valueOf(gt.get(0)).state, call, genotype.toString());
        }
        final String[] SQ = genotype.getAnyAttribute(XHMMSegmentGenotyper.SOME_QUALITY_KEY).toString().split(VCFConstants.INFO_FIELD_ARRAY_SEPARATOR);
        final double someQual = variantSegment.getSegment().getSomeQuality();
        Assert.assertEquals(Double.parseDouble(SQ[call == CopyNumberTriState.DELETION ? 0 : 1]), someQual, XHMMSegmentGenotyper.PHRED_SCORE_PRECISION, variantSegment.getSampleName() + " => " + genotype.toString());

        final String[] LQ = genotype.getAnyAttribute(XHMMSegmentGenotyper.START_QUALITY_KEY).toString().split(VCFConstants.INFO_FIELD_ARRAY_SEPARATOR);
        final double startQuality = variantSegment.getSegment().getStartQuality();
        Assert.assertEquals(Double.parseDouble(LQ[call == CopyNumberTriState.DELETION ? 0 : 1]), startQuality, XHMMSegmentGenotyper.PHRED_SCORE_PRECISION, variantSegment.getSampleName() + " => " + genotype.toString());

        final String[] RQ = genotype.getAnyAttribute(XHMMSegmentGenotyper.END_QUALITY_KEY).toString().split(VCFConstants.INFO_FIELD_ARRAY_SEPARATOR);
        final double endQuality = variantSegment.getSegment().getEndQuality();
        Assert.assertEquals(Double.parseDouble(RQ[call == CopyNumberTriState.DELETION ? 0 : 1]), endQuality, XHMMSegmentGenotyper.PHRED_SCORE_PRECISION, variantSegment.getSampleName() + " => " + genotype.toString());

        // Check the PL.
        final int[] PL = genotype.getPL();
        final int observedGQFromPL = Math.min(XHMMSegmentGenotyper.MAX_GQ, PL[CopyNumberTriStateAllele.REF.index()] - PL[CopyNumberTriStateAllele.valueOf(call).index()]);
        final double expectedCallPL = GATKProtectedMathUtils.roundPhred(QualityUtils.phredScaleErrorRate(QualityUtils.qualToProb(variantSegment.getSegment().getExactQuality())), HMMPostProcessor.PHRED_SCORE_PRECISION);
        final double expectedRefPL = GATKProtectedMathUtils.roundPhred(QualityUtils.phredScaleCorrectRate(QualityUtils.qualToProb(variantSegment.getSegment().getEventQuality())), HMMPostProcessor.PHRED_SCORE_PRECISION);
        final int expectedGQFromPL = Math.min(XHMMSegmentGenotyper.MAX_GQ, (int) Math.round(expectedRefPL - expectedCallPL));
        Assert.assertTrue(Math.abs(observedGQFromPL - expectedGQFromPL) <= 1, genotype.toString() + " " + variantSegment.getSegment().getEventQuality());
    }
}
 
Example #18
Source File: GVCFBlock.java    From gatk with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
/**
 * Convert a HomRefBlock into a VariantContext
 *
 * @param sampleName sample name to give this variant context
 * @return a VariantContext representing the gVCF encoding for this block.
 * It will return {@code null} if input {@code block} is {@code null}, indicating that there
 * is no variant-context to be output into the VCF.
 */
public VariantContext toVariantContext(String sampleName, final boolean floorBlocks) {
    final VariantContextBuilder vcb = new VariantContextBuilder(getStartingVC());
    vcb.attributes(new LinkedHashMap<>(2)); // clear the attributes
    vcb.stop(getEnd());
    vcb.attribute(VCFConstants.END_KEY, getEnd());
    final Genotype genotype = createHomRefGenotype(sampleName, floorBlocks);

    return vcb.genotypes(genotype).make();
}
 
Example #19
Source File: GenotypeConcordanceTest.java    From picard with MIT License 5 votes vote down vote up
@Test(dataProvider = "genotypeConcordanceDetermineStateDataProvider")
public void testGenotypeConcordanceDetermineState(final Allele truthAllele1, final Allele truthAllele2, final TruthState expectedTruthState,
                                                  final Allele callAllele1, final Allele callAllele2, final CallState expectedCallState) throws Exception {
    final List<Allele> truthAlleles = makeUniqueListOfAlleles(truthAllele1, truthAllele2);
    final Genotype truthGt = GenotypeBuilder.create(TRUTH_SAMPLE_NAME, Arrays.asList(truthAllele1, truthAllele2));

    final VariantContext truthVariantContext = new VariantContextBuilder("test", snpLoc, snpLocStart, snpLocStop, truthAlleles).genotypes(truthGt).make();

    final List<Allele> callAlleles = makeUniqueListOfAlleles(callAllele1, callAllele2);
    final Genotype callGt = GenotypeBuilder.create(CALL_SAMPLE_NAME, Arrays.asList(callAllele1, callAllele2));
    final VariantContext callVariantContext = new VariantContextBuilder("test", snpLoc, snpLocStart, snpLocStop, callAlleles).genotypes(callGt).make();

    testGenotypeConcordanceDetermineState(truthVariantContext, expectedTruthState, callVariantContext, expectedCallState, 0, 0);
}
 
Example #20
Source File: SelectVariants.java    From gatk with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
/**
 * Helper method to subset a VC record, modifying some metadata stored in the INFO field (i.e. AN, AC, AF).
 *
 * @param vc       the VariantContext record to subset
 * @param preserveAlleles should we trim constant sequence from the beginning and/or end of all alleles, or preserve it?
 * @param removeUnusedAlternates removes alternate alleles with AC=0
 * @return the subsetted VariantContext
 */
private VariantContext subsetRecord(final VariantContext vc, final boolean preserveAlleles, final boolean removeUnusedAlternates) {
    //subContextFromSamples() always decodes the vc, which is a fairly expensive operation.  Avoid if possible
    if (noSamplesSpecified && !removeUnusedAlternates) {
        return vc;
    }

    // strip out the alternate alleles that aren't being used
    final VariantContext sub = vc.subContextFromSamples(samples, removeUnusedAlternates);

    // If no subsetting happened, exit now
    if (sub.getNSamples() == vc.getNSamples() && sub.getNAlleles() == vc.getNAlleles()) {
        return vc;
    }

    // fix the PL and AD values if sub has fewer alleles than original vc and remove a fraction of the genotypes if needed
    final GenotypesContext oldGs = sub.getGenotypes();
    GenotypesContext newGC = sub.getNAlleles() == vc.getNAlleles() ? oldGs :
            AlleleSubsettingUtils.subsetAlleles(oldGs, 0, vc.getAlleles(), sub.getAlleles(), GenotypeAssignmentMethod.DO_NOT_ASSIGN_GENOTYPES, vc.getAttributeAsInt(VCFConstants.DEPTH_KEY, 0));

    if (fractionGenotypes > 0) {
        final List<Genotype> genotypes = newGC.stream().map(genotype -> randomGenotypes.nextDouble() > fractionGenotypes ? genotype :
                new GenotypeBuilder(genotype).alleles(getNoCallAlleles(genotype.getPloidy())).noGQ().make()).collect(Collectors.toList());
        newGC = GenotypesContext.create(new ArrayList<>(genotypes));
    }

    // since the VC has been subset (either by sample or allele), we need to strip out the MLE tags
    final VariantContextBuilder builder = new VariantContextBuilder(sub);
    builder.rmAttributes(Arrays.asList(GATKVCFConstants.MLE_ALLELE_COUNT_KEY,GATKVCFConstants.MLE_ALLELE_FREQUENCY_KEY));
    builder.genotypes(newGC);
    addAnnotations(builder, vc, sub.getSampleNames());
    final VariantContext subset = builder.make();

    return preserveAlleles? subset : GATKVariantContextUtils.trimAlleles(subset,true,true);
}
 
Example #21
Source File: MendelianViolationDetector.java    From picard with MIT License 5 votes vote down vote up
/** Tests to ensure that a locus is bi-allelic within the given set of samples. */
private boolean isVariant(final Genotype... gts) {
    for (final Genotype gt : gts) {
        if (gt.isCalled() && !gt.isHomRef()) return true;
    }

    return false;
}
 
Example #22
Source File: ConvertGSVariantsToSegments.java    From gatk-protected with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
@Override
public void apply(final VariantContext variant, final ReadsContext readsContext,
                  final ReferenceContext referenceContext,
                  final FeatureContext featureContext) {
    final SimpleInterval interval = new SimpleInterval(variant);
    final int targetCount = targets.indexRange(interval).size();
    final int[] callCounts = new int[CopyNumberTriState.values().length];
    for (final Genotype genotype : variant.getGenotypes().iterateInSampleNameOrder()) {
        final String sample = genotype.getSampleName();
        final double mean = doubleFrom(genotype.getExtendedAttribute(GS_COPY_NUMBER_FRACTION));
        final int copyNumber = intFrom(genotype.getExtendedAttribute(GS_COPY_NUMBER_FORMAT));
        final CopyNumberTriState call = copyNumber == neutralCopyNumber ? CopyNumberTriState.NEUTRAL : (copyNumber < neutralCopyNumber) ? CopyNumberTriState.DELETION : CopyNumberTriState.DUPLICATION;
        callCounts[call.ordinal()]++;
        final double[] probs = doubleArrayFrom(genotype.getExtendedAttribute(GS_COPY_NUMBER_POSTERIOR));
        final double log10PostQualCall = calculateLog10CallQuality(probs, call);
        final double log10PostQualNonRef = calculateLog10CallQualityNonRef(probs);
        final double phredProbCall = -10.0 * log10PostQualCall;
        final double phredProbNonRef = -10.0 * log10PostQualNonRef;
        final HiddenStateSegment<CopyNumberTriState, Target> segment = new HiddenStateSegment<>(
                interval,
                targetCount,
                mean,
                0.0, // GS VCF does not contain any stddev or var estimate for coverage fraction.
                call,
                phredProbCall, // GS does not provide an EQ, we approximate it to be the 1 - sum of all call compatible CN corresponding posterior probs
                Double.NaN, // GS does not provide a SQ, we leave is a NaN.
                Double.NaN, // GS does not provide a START Q.
                Double.NaN, // GS does not provide a END Q.
                phredProbNonRef
        );

        final HiddenStateSegmentRecord<CopyNumberTriState, Target> record = new HiddenStateSegmentRecord<>(sample, segment);
        try {
            outputWriter.writeRecord(record);
        } catch (final IOException ex) {
            throw new UserException.CouldNotCreateOutputFile(outputFile, ex);
        }
    }
}
 
Example #23
Source File: VcfToAdpc.java    From picard with MIT License 5 votes vote down vote up
private int getUnsignedShortAttributeAsInt(final Genotype genotype, final String key) {
    final int attributeAsInt = Integer.parseInt(getRequiredAttribute(genotype, key).toString());
    if (attributeAsInt < 0) {
        throw new PicardException("Value for key " + key + " (" + attributeAsInt + ") is <= 0!  Invalid value for unsigned int");
    }
    if (attributeAsInt > picard.arrays.illumina.InfiniumDataFile.MAX_UNSIGNED_SHORT) {
        log.warn("Value for key " + key + " (" + attributeAsInt + ") is > " + picard.arrays.illumina.InfiniumDataFile.MAX_UNSIGNED_SHORT + " (truncating it)");
        return picard.arrays.illumina.InfiniumDataFile.MAX_UNSIGNED_SHORT;
    }
    return attributeAsInt;
}
 
Example #24
Source File: CombineGVCFsIntegrationTest.java    From gatk with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
@Test
public void testDropSomaticFilteringAnnotations() throws IOException {
    final File output = createTempFile("combinegvcfs", ".vcf");
    final ArgumentsBuilder args = new ArgumentsBuilder();
    args.addReference(new File(b37Reference));
    args.addOutput(output);
    args.addVCF(getTestFile("NA12878.MT.filtered.g.vcf"));
    args.addVCF(getTestFile("NA19240.MT.filtered.g.vcf"));
    args.addVCF(getTestFile("NA12891.MT.filtered.g.vcf"));
    args.add(CombineGVCFs.SOMATIC_INPUT_LONG_NAME, true);
    args.add(CombineGVCFs.DROP_SOMATIC_FILTERING_ANNOTATIONS_LONG_NAME, true);
    runCommandLine(args);

    final List<VariantContext> actualVCs = getVariantContexts(output);
    for (final VariantContext vc : actualVCs) {
        Assert.assertFalse(vc.hasAttribute(GATKVCFConstants.POPULATION_AF_KEY));
        Assert.assertFalse(vc.hasAttribute(GATKVCFConstants.TUMOR_LOG_10_ODDS_KEY));
        Assert.assertFalse(vc.hasAttribute(GATKVCFConstants.MEDIAN_MAPPING_QUALITY_KEY));
        Assert.assertFalse(vc.hasAttribute(GATKVCFConstants.MEDIAN_BASE_QUALITY_KEY));
        Assert.assertFalse(vc.hasAttribute(GATKVCFConstants.MEDIAN_READ_POSITON_KEY));
        Assert.assertFalse(vc.hasAttribute(GATKVCFConstants.MEDIAN_FRAGMENT_LENGTH_KEY));
        for (final Genotype g : vc.getGenotypes()) {
            Assert.assertFalse(g.hasExtendedAttribute(GATKVCFConstants.MEDIAN_MAPPING_QUALITY_KEY));
            Assert.assertFalse(g.hasExtendedAttribute(GATKVCFConstants.MEDIAN_BASE_QUALITY_KEY));
            Assert.assertFalse(g.hasExtendedAttribute(GATKVCFConstants.MEDIAN_READ_POSITON_KEY));
            Assert.assertFalse(g.hasExtendedAttribute(GATKVCFConstants.MEDIAN_FRAGMENT_LENGTH_KEY));
            Assert.assertTrue(g.hasExtendedAttribute(GATKVCFConstants.TUMOR_LOG_10_ODDS_KEY));
        }

    }
}
 
Example #25
Source File: GATKProtectedVariantContextUtils.java    From gatk-protected with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
/**
 * Returns an attribute as a string.
 * @param genotype the source genotype.
 * @param key the attribute key.
 * @param defaultValue default value to return in case the attribute does not have a value defined.
 * @return the string version of the attribute value.
 * @throws IllegalArgumentException if {@code genotype} or {@code key} is {@code null}.
 */
public static String getAttributeAsString(final Genotype genotype, final String key, final String defaultValue) {
    Utils.nonNull(genotype);
    Utils.nonNull(key);
    final Object value = genotype.getExtendedAttribute(key);
    if (value == null) {
        return defaultValue;
    } else {
        return String.valueOf(value);
    }
}
 
Example #26
Source File: VcfToAdpc.java    From picard with MIT License 5 votes vote down vote up
private Object getRequiredAttribute(Genotype genotype, final String key) {
    final Object value = genotype.getAnyAttribute(key);
    if (value == null) {
        throw new PicardException("Unable to find attribute " + key + " in VCF Genotype field.  Is this an Arrays VCF file?");
    }
    return value;
}
 
Example #27
Source File: OrientationBiasReadCounts.java    From gatk with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
@Override
public void annotate(final ReferenceContext refContext,
                              final VariantContext vc,
                              final Genotype g,
                              final GenotypeBuilder gb,
                              final AlleleLikelihoods<GATKRead, Allele> likelihoods){
    Utils.nonNull(gb, "gb is null");
    Utils.nonNull(vc, "vc is null");

    if (g == null || likelihoods == null) {
        return;
    }

    final Map<Allele, MutableInt> f1r2Counts = likelihoods.alleles().stream()
            .collect(Collectors.toMap(a -> a, a -> new MutableInt(0)));

    final Map<Allele, MutableInt> f2r1Counts = likelihoods.alleles().stream()
            .collect(Collectors.toMap(a -> a, a -> new MutableInt(0)));

    Utils.stream(likelihoods.bestAllelesBreakingTies(g.getSampleName()))
            .filter(ba -> ba.isInformative() && isUsableRead(ba.evidence) && BaseQualityRankSumTest.getReadBaseQuality(ba.evidence, vc).orElse(0) >= MINIMUM_BASE_QUALITY)
            .forEach(ba -> (ReadUtils.isF2R1(ba.evidence) ? f2r1Counts : f1r2Counts).get(ba.allele).increment());

    final int[] f1r2 = vc.getAlleles().stream().mapToInt(a -> f1r2Counts.get(a).intValue()).toArray();

    final int[] f2r1 = vc.getAlleles().stream().mapToInt(a -> f2r1Counts.get(a).intValue()).toArray();

    gb.attribute(GATKVCFConstants.F1R2_KEY, f1r2);
    gb.attribute(GATKVCFConstants.F2R1_KEY, f2r1);
}
 
Example #28
Source File: GVCFBlockCombiner.java    From gatk with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
/**
 * Helper function to create a new HomRefBlock from a variant context and current genotype
 *
 * @param vc the VariantContext at the site where want to start the band
 * @param g  the genotype of the sample from vc that should be used to initialize the block
 * @return a newly allocated and initialized block containing g already
 */
GVCFBlock createNewBlock(final VariantContext vc, final Genotype g) {
    // figure out the GQ limits to use based on the GQ of g
    final int gq = Math.min(g.getGQ(), MAX_GENOTYPE_QUAL);
    final Range<Integer> partition = gqPartitions.get(gq);

    if( partition == null) {
        throw new GATKException("GQ " + g + " from " + vc + " didn't fit into any partition");
    }

    // create the block, add g to it, and return it for use
    final HomRefBlock block = new HomRefBlock(vc, partition.lowerEndpoint(), partition.upperEndpoint(), defaultPloidy);
    block.add(vc.getStart(), vc.getAttributeAsInt(VCFConstants.END_KEY, vc.getStart()), g);
    return block;
}
 
Example #29
Source File: HaplotypeMap.java    From picard with MIT License 5 votes vote down vote up
static private String anchorFromVc(final VariantContext vc) {
    final Genotype genotype = vc.getGenotype(0);

    if (genotype == null || !genotype.hasExtendedAttribute(VCFConstants.PHASE_SET_KEY)) {
        return SYNTHETIC_PHASESET_PREFIX + "_" + vc.getContig() + "_" + vc.getStart();
    } else {
        return PHASESET_PREFIX + "_" + vc.getContig() + "_" + genotype.getExtendedAttribute(VCFConstants.PHASE_SET_KEY);
    }
}
 
Example #30
Source File: SelectVariants.java    From gatk with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
/**
 * Checks if the two variants have the same genotypes for the selected samples
 *
 * @param vc the variant rod VariantContext.
 * @param compVCs the comparison VariantContext
 * @return true if VariantContexts are concordant, false otherwise
 */
private boolean isConcordant (final VariantContext vc, final Collection<VariantContext> compVCs) {
    if (vc == null || compVCs == null || compVCs.isEmpty()) {
        return false;
    }

    // if we're not looking for specific samples then the fact that we have both VCs is enough to call it concordant.
    if (noSamplesSpecified) {
        return true;
    }

    // make a list of all samples contained in this variant VC that are being tracked by the user command line arguments.
    final Set<String> variantSamples = vc.getSampleNames();
    variantSamples.retainAll(samples);

    // check if we can find all samples from the variant rod in the comp rod.
    for (final String sample : variantSamples) {
        boolean foundSample = false;
        for (final VariantContext compVC : compVCs) {
            final Genotype varG = vc.getGenotype(sample);
            final Genotype compG = compVC.getGenotype(sample);
            if (haveSameGenotypes(varG, compG)) {
                foundSample = true;
                break;
            }
        }
        // if at least one sample doesn't have the same genotype, we don't have concordance
        if (!foundSample) {
            return false;
        }
    }
    return true;
}