Java Code Examples for htsjdk.variant.variantcontext.Genotype#hasExtendedAttribute()

The following examples show how to use htsjdk.variant.variantcontext.Genotype#hasExtendedAttribute() . You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: PonBuilder.java    From hmftools with GNU General Public License v3.0 5 votes vote down vote up
public void add(@NotNull final VariantContext context) {
    final VariantHotspot hotspot = hotspot(context);
    final Counter counter = map.computeIfAbsent(hotspot, Counter::new);
    final Genotype genotype = context.getGenotype(0);
    if (!hotspot.ref().contains("N") && genotype.hasExtendedAttribute(SageVCF.RAW_ALLELIC_DEPTH)) {
        String rawDepth = (String) genotype.getExtendedAttribute(SageVCF.RAW_ALLELIC_DEPTH);
        int allelicDepth = Integer.parseInt(rawDepth.split(",")[1]);
        if (allelicDepth >= MIN_INPUT_ALLELIC_DEPTH) {
            counter.increment(allelicDepth);
        }
    }
}
 
Example 2
Source File: HaplotypeMap.java    From picard with MIT License 5 votes vote down vote up
static private String anchorFromVc(final VariantContext vc) {
    final Genotype genotype = vc.getGenotype(0);

    if (genotype == null || !genotype.hasExtendedAttribute(VCFConstants.PHASE_SET_KEY)) {
        return SYNTHETIC_PHASESET_PREFIX + "_" + vc.getContig() + "_" + vc.getStart();
    } else {
        return PHASESET_PREFIX + "_" + vc.getContig() + "_" + genotype.getExtendedAttribute(VCFConstants.PHASE_SET_KEY);
    }
}
 
Example 3
Source File: RMSMappingQuality.java    From gatk with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
/**
 *
 * @return the number of reads at the given site, calculated as InfoField {@link VCFConstants#DEPTH_KEY} minus the
 * format field {@link GATKVCFConstants#MIN_DP_FORMAT_KEY} or DP of each of the HomRef genotypes at that site
 * @throws UserException.BadInput if the {@link VCFConstants#DEPTH_KEY} is missing or if the calculated depth is <= 0
 */
@VisibleForTesting
private static int getNumOfReads(final VariantContext vc) {
    if(vc.hasAttribute(GATKVCFConstants.MAPPING_QUALITY_DEPTH_DEPRECATED)) {
        int mqDP = vc.getAttributeAsInt(GATKVCFConstants.MAPPING_QUALITY_DEPTH_DEPRECATED, 0);
        if (mqDP > 0) {
            return mqDP;
        }
    }

    //don't use the full depth because we don't calculate MQ for reference blocks
    //don't count spanning deletion calls towards number of reads
    int numOfReads = vc.getAttributeAsInt(VCFConstants.DEPTH_KEY, -1);
    if(vc.hasGenotypes()) {
        for(final Genotype gt : vc.getGenotypes()) {
           if(hasReferenceDepth(gt)) {
                //site-level DP contribution will come from MIN_DP for gVCF-called reference variants or DP for BP resolution
                if (gt.hasExtendedAttribute(GATKVCFConstants.MIN_DP_FORMAT_KEY)) {
                    numOfReads -= Integer.parseInt(gt.getExtendedAttribute(GATKVCFConstants.MIN_DP_FORMAT_KEY).toString());
                } else if (gt.hasDP()) {
                    numOfReads -= gt.getDP();
                }
            }
            else if(hasSpanningDeletionAllele(gt)) {
                //site-level DP contribution will come from MIN_DP for gVCF-called reference variants or DP for BP resolution
                if (gt.hasExtendedAttribute(GATKVCFConstants.MIN_DP_FORMAT_KEY)) {
                    numOfReads -= Integer.parseInt(gt.getExtendedAttribute(GATKVCFConstants.MIN_DP_FORMAT_KEY).toString());
                } else if (gt.hasDP()) {
                    numOfReads -= gt.getDP();
                }
            }
        }
    }
    if (numOfReads <= 0){
        numOfReads = -1;  //return -1 to result in a NaN
    }
    return numOfReads;
}
 
Example 4
Source File: ReadOrientationFilter.java    From gatk with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
private double artifactProbability(final ReferenceContext referenceContext, final int refPosition, final Genotype g, final int indexOfMaxTumorLod, final Nucleotide altBase) {
    final String refContext = referenceContext.getKmerAround(refPosition, F1R2FilterConstants.REF_CONTEXT_PADDING);
    if (refContext ==  null || refContext.contains("N")){
        return 0;
    }

    Utils.validate(refContext.length() == 2 * F1R2FilterConstants.REF_CONTEXT_PADDING + 1,
            String.format("kmer must have length %d but got %d", 2 * F1R2FilterConstants.REF_CONTEXT_PADDING + 1, refContext.length()));

    final Nucleotide refAllele = F1R2FilterUtils.getMiddleBase(refContext);

    if (!(g.hasExtendedAttribute(GATKVCFConstants.F1R2_KEY) && g.hasExtendedAttribute(GATKVCFConstants.F2R1_KEY))) {
        return 0;
    }

    final int[] f1r2 = getF1R2(g);
    final int[] f2r1 = getF2R1(g);
    final int refCount =  f1r2[0] + f2r1[0];
    final int altF1R2 = f1r2[indexOfMaxTumorLod + 1];
    final int altF2R1 = f2r1[indexOfMaxTumorLod + 1];
    final int altCount = altF1R2 + altF2R1;
    final Optional<ArtifactPrior> artifactPrior = artifactPriorCollections.get(g.getSampleName()).get(refContext);

    if (! artifactPrior.isPresent()){
        return 0;
    }

    final int depth = refCount + altCount;

    final double[] posterior = LearnReadOrientationModelEngine.computeResponsibilities(refAllele, altBase, altCount, altF1R2, depth, artifactPrior.get().getPi(), true);

    final double posteriorOfF1R2 = posterior[ArtifactState.getF1R2StateForAlt(altBase).ordinal()];
    final double posteriorOfF2R1 = posterior[ArtifactState.getF2R1StateForAlt(altBase).ordinal()];

    return Math.max(posteriorOfF1R2, posteriorOfF2R1);
}
 
Example 5
Source File: EvaluateCopyNumberTriStateCallsIntegrationTest.java    From gatk-protected with BSD 3-Clause "New" or "Revised" License 4 votes vote down vote up
private void checkOutputCallsWithoutOverlappingTruthConcordance(final File truthFile, final File callsFile, final File targetsFile, final File vcfOutput, final EvaluationFiltersArgumentCollection filteringOptions) {
    final List<VariantContext> truthVariants = readVCFFile(truthFile);
    final List<VariantContext> outputVariants = readVCFFile(vcfOutput);
    final List<VariantContext> callsVariants = readVCFFile(callsFile);
    final Set<String> outputSamples = outputVariants.get(0).getSampleNames();
    final TargetCollection<Target> targets = TargetArgumentCollection.readTargetCollection(targetsFile);

    for (final VariantContext call : callsVariants) {
        final List<Target> overlappingTargets = targets.targets(call);
        final List<VariantContext> overlappingOutput = outputVariants.stream()
                .filter(vc -> new SimpleInterval(vc).overlaps(call))
                .collect(Collectors.toList());
        final List<VariantContext> overlappingTruth = truthVariants.stream()
                .filter(vc -> new SimpleInterval(vc).overlaps(call))
                .collect(Collectors.toList());

        if (!overlappingTruth.isEmpty()) {
            continue;
        }

        @SuppressWarnings("all")
        final Optional<VariantContext> matchingOutputOptional = overlappingOutput.stream()
                .filter(vc -> new SimpleInterval(call).equals(new SimpleInterval(vc)))
                .findAny();
        final VariantContext matchingOutput = matchingOutputOptional.get();
        final int sampleCallsCount[] = new int[CopyNumberTriStateAllele.ALL_ALLELES.size()];
        for (final String sample : outputSamples) {
            final Genotype outputGenotype = matchingOutput.getGenotype(sample);
            final Genotype callGenotype = call.getGenotype(sample);
            final Allele expectedCall = callGenotype.getAllele(0).isCalled() ? CopyNumberTriStateAllele.valueOf(callGenotype.getAllele(0)) : null;
            final Allele actualCall = outputGenotype.getAllele(0).isCalled() ? CopyNumberTriStateAllele.valueOf(outputGenotype.getAllele(0)) : null;
            Assert.assertEquals(expectedCall, actualCall);
            final boolean expectedDiscovered = XHMMSegmentGenotyper.DISCOVERY_TRUE.equals(GATKProtectedVariantContextUtils.getAttributeAsString(callGenotype, XHMMSegmentGenotyper.DISCOVERY_KEY, "N"));
            final boolean actualDiscovered = XHMMSegmentGenotyper.DISCOVERY_TRUE.equals(GATKProtectedVariantContextUtils.getAttributeAsString(callGenotype, XHMMSegmentGenotyper.DISCOVERY_KEY, "N"));
            Assert.assertEquals(actualDiscovered, expectedDiscovered);
            final int[] expectedCounts = new int[CopyNumberTriStateAllele.ALL_ALLELES.size()];
            if (expectedCall.isCalled() && actualDiscovered) {
                expectedCounts[CopyNumberTriStateAllele.valueOf(expectedCall).index()]++;
            }
            if (outputGenotype.hasExtendedAttribute(VariantEvaluationContext.CALLED_ALLELE_COUNTS_KEY)) {
                Assert.assertEquals(GATKProtectedVariantContextUtils.getAttributeAsIntArray(outputGenotype, VariantEvaluationContext.CALLED_ALLELE_COUNTS_KEY, () -> new int[CopyNumberTriStateAllele.ALL_ALLELES.size()], 0), expectedCounts);
            }
            if (outputGenotype.hasExtendedAttribute(VariantEvaluationContext.CALLED_SEGMENTS_COUNT_KEY)) {
                Assert.assertEquals(GATKProtectedVariantContextUtils.getAttributeAsInt(outputGenotype, VariantEvaluationContext.CALLED_SEGMENTS_COUNT_KEY, -1), expectedCall.isCalled() && actualDiscovered ? 1 : 0);
            }
            final String evalClass = GATKProtectedVariantContextUtils.getAttributeAsString(outputGenotype, VariantEvaluationContext.EVALUATION_CLASS_KEY, null);
            Assert.assertEquals(evalClass, expectedCall.isCalled() && actualDiscovered && expectedCall.isNonReference() ? EvaluationClass.UNKNOWN_POSITIVE.acronym : null);
            if (expectedCall.isCalled()) {
                sampleCallsCount[CopyNumberTriStateAllele.valueOf(expectedCall).index()]++;
            }
            Assert.assertEquals(GATKProtectedVariantContextUtils.getAttributeAsDouble(outputGenotype, VariantEvaluationContext.CALL_QUALITY_KEY, 0.0), callGQ(callGenotype), 0.01);
        }
        final int expectedAN = (int) MathUtils.sum(sampleCallsCount);
        final int observedAN = matchingOutput.getAttributeAsInt(VariantEvaluationContext.CALLS_ALLELE_NUMBER_KEY, -1);
        Assert.assertEquals(observedAN, expectedAN);
        final double[] expectedAF = Arrays.copyOfRange(IntStream.of(sampleCallsCount).mapToDouble(i -> expectedAN > 0 ? i / (double) expectedAN : 0.0)
                .toArray(), 1, sampleCallsCount.length);
        final double[] observedAF = GATKProtectedVariantContextUtils.getAttributeAsDoubleArray(matchingOutput, VariantEvaluationContext.CALLS_ALLELE_FREQUENCY_KEY, () -> new double[matchingOutput.getAlternateAlleles().size()], 0.0);
        Assert.assertNotNull(observedAF);
        assertEquals(observedAF, expectedAF, 0.01);
        Assert.assertEquals(matchingOutput.getAttributeAsInt(VariantEvaluationContext.TRUTH_ALLELE_NUMBER_KEY, -1), 0);
    }
}
 
Example 6
Source File: HaplotypeMap.java    From picard with MIT License 4 votes vote down vote up
/**
 * Constructs a HaplotypeMap from the provided file.
 */

private void fromVcf(final File file) {
    try ( final VCFFileReader reader = new VCFFileReader(file, false)) {

        final SAMSequenceDictionary dict = reader.getFileHeader().getSequenceDictionary();

        if (dict == null || dict.getSequences().isEmpty()) {
            throw new IllegalStateException("Haplotype map VCF file must contain header: " + file.getAbsolutePath());
        }

        initialize(new SAMFileHeader(dict));

        final Map<String, HaplotypeBlock> anchorToHaplotype = new HashMap<>();

        for (final VariantContext vc : reader) {

            if (vc.getNSamples() > 1) {
                throw new IllegalStateException("Haplotype map VCF file must contain at most one sample: " + file.getAbsolutePath());
            }

            final Genotype gc = vc.getGenotype(0); // may be null
            final boolean hasGc = gc != null;

            if (vc.getAlternateAlleles().size() != 1) {
                throw new IllegalStateException("Haplotype map VCF file must contain exactly one alternate allele per site: " + vc.toString());
            }

            if (!vc.isSNP()) {
                throw new IllegalStateException("Haplotype map VCF file must contain only SNPs: " + vc.toString());
            }

            if (!vc.hasAttribute(VCFConstants.ALLELE_FREQUENCY_KEY)) {
                throw new IllegalStateException("Haplotype map VCF Variants must have an '"+ VCFConstants.ALLELE_FREQUENCY_KEY + "' INFO field: " + vc.toString());
            }


            if (hasGc && gc.isPhased() && !gc.hasExtendedAttribute(VCFConstants.PHASE_SET_KEY)) {
                throw new IllegalStateException("Haplotype map VCF Variants' genotypes that are phased must have a PhaseSet (" + VCFConstants.PHASE_SET_KEY+")" + vc.toString());
            }

            if (hasGc && gc.isPhased() && !gc.isHet()) {
                throw new IllegalStateException("Haplotype map VCF Variants' genotypes that are phased must be HET" + vc.toString());
            }

            // Then parse them out
            final String chrom = vc.getContig();
            final int pos = vc.getStart();
            final String name = vc.getID();

            final byte ref = vc.getReference().getBases()[0];
            final byte var = vc.getAlternateAllele(0).getBases()[0];

            final double temp_maf = vc.getAttributeAsDouble(VCFConstants.ALLELE_FREQUENCY_KEY, 0D);
            final boolean swapped = hasGc && !gc.getAllele(0).equals(vc.getReference());

            final byte major, minor;
            final double maf;

            if (swapped) {
                major = var;
                minor = ref;
                maf = 1 - temp_maf;
            } else {
                major = ref;
                minor = var;
                maf = temp_maf;
            }

            final String anchor = anchorFromVc(vc);

            // If it's the anchor snp, start the haplotype
            if (!anchorToHaplotype.containsKey(anchor)) {
                final HaplotypeBlock newBlock = new HaplotypeBlock(maf);
                anchorToHaplotype.put(anchor, newBlock);
            }
            final HaplotypeBlock block = anchorToHaplotype.get(anchor);
            block.addSnp(new Snp(name, chrom, pos, major, minor, maf, null));
        }

        // And add them all now that they are all ready.
        fromHaplotypes(anchorToHaplotype.values());
    }
}
 
Example 7
Source File: HomRefBlock.java    From gatk with BSD 3-Clause "New" or "Revised" License 4 votes vote down vote up
/**
 * Add a homRef block to the current block
 *
 * @param pos current genomic position
 * @param newEnd new calculated block end position
 * @param genotype A non-null Genotype with GQ and DP attributes
 */
@Override
public void add(final int pos, final int newEnd, final Genotype genotype) {
    Utils.nonNull(genotype, "genotype cannot be null");
    if ( ! genotype.hasPL() ) { throw new IllegalArgumentException("genotype must have PL field");}
    if ( pos != end + 1 ) { throw new IllegalArgumentException("adding genotype at pos " + pos + " isn't contiguous with previous end " + end); }
    if ( genotype.getPloidy() != ploidy) { throw new IllegalArgumentException("cannot add a genotype with a different ploidy: " + genotype.getPloidy() + " != " + ploidy); }
    // Make sure the GQ is within the bounds of this band. Treat GQs > 99 as 99.
    if ( !withinBounds(Math.min(genotype.getGQ(), VCFConstants.MAX_GENOTYPE_QUAL))) {
        throw new IllegalArgumentException("cannot add a genotype with GQ=" + genotype.getGQ() + " because it's not within bounds ["
                + this.getGQLowerBound() + ',' + this.getGQUpperBound() + ')');
    }

    if( minPLs == null ) {
        minPLs = genotype.getPL();
    }
    else { // otherwise take the min with the provided genotype's PLs
        final int[] pls = genotype.getPL();
        if (pls.length != minPLs.length) {
            throw new GATKException("trying to merge different PL array sizes: " + pls.length + " != " + minPLs.length);
        }
        for (int i = 0; i < pls.length; i++) {
            minPLs[i] = Math.min(minPLs[i], pls[i]);
        }
    }

    if( genotype.hasExtendedAttribute(GATKVCFConstants.PHRED_SCALED_POSTERIORS_KEY)) {
        if (minPPs == null ) {
            minPPs = PosteriorProbabilitiesUtils.parsePosteriorsIntoPhredSpace(genotype);
        }
        else { // otherwise take the min with the provided genotype's PLs
            final int[] pps = PosteriorProbabilitiesUtils.parsePosteriorsIntoPhredSpace(genotype);
            if (pps.length != minPPs.length) {
                throw new GATKException("trying to merge different PP array sizes: " + pps.length + " != " + minPPs.length);
            }
            for (int i = 0; i < pps.length; i++) {
                minPPs[i] = Math.min(minPPs[i], pps[i]);
            }
        }
    }

    end = newEnd;
    DPs.add(Math.max(genotype.getDP(), 0)); // DP must be >= 0
}
 
Example 8
Source File: RMSMappingQuality.java    From gatk with BSD 3-Clause "New" or "Revised" License 4 votes vote down vote up
/**
 *
 * @return the number of reads at the given site, trying first {@Link GATKVCFConstants.RAW_MAPPING_QUALITY_WITH_DEPTH_KEY},
 * falling back to calculating the value as InfoField {@link VCFConstants#DEPTH_KEY} minus the
 * format field {@link GATKVCFConstants#MIN_DP_FORMAT_KEY} or DP of each of the HomRef genotypes at that site.
 * If neither of those is possible, will fall back to calculating the reads from the likelihoods data if provided.
 * @throws UserException.BadInput if the {@link VCFConstants#DEPTH_KEY} is missing or if the calculated depth is <= 0
 */
@VisibleForTesting
static long getNumOfReads(final VariantContext vc,
                         final AlleleLikelihoods<GATKRead, Allele> likelihoods) {
    if(vc.hasAttribute(GATKVCFConstants.RAW_MAPPING_QUALITY_WITH_DEPTH_KEY)) {
        List<Long> mqTuple = VariantContextGetters.getAttributeAsLongList(vc, GATKVCFConstants.RAW_MAPPING_QUALITY_WITH_DEPTH_KEY,0L);
        if (mqTuple.get(TOTAL_DEPTH_INDEX) > 0) {
            return mqTuple.get(TOTAL_DEPTH_INDEX);
        }
    }

    long numOfReads = 0;
    if (vc.hasAttribute(VCFConstants.DEPTH_KEY)) {
        numOfReads = VariantContextGetters.getAttributeAsLong(vc, VCFConstants.DEPTH_KEY, -1L);
        if(vc.hasGenotypes()) {
            for(final Genotype gt : vc.getGenotypes()) {
                if(gt.isHomRef()) {
                    //site-level DP contribution will come from MIN_DP for gVCF-called reference variants or DP for BP resolution
                    if (gt.hasExtendedAttribute(GATKVCFConstants.MIN_DP_FORMAT_KEY)) {
                        numOfReads -= Long.parseLong(gt.getExtendedAttribute(GATKVCFConstants.MIN_DP_FORMAT_KEY).toString());
                    } else if (gt.hasDP()) {
                        numOfReads -= gt.getDP();
                    }
                }
            }
        }

    // If there is no depth key, try to compute from the likelihoods
    } else if (likelihoods != null && likelihoods.numberOfAlleles() != 0) {
        for (int i = 0; i < likelihoods.numberOfSamples(); i++) {
            for (GATKRead read : likelihoods.sampleEvidence(i)) {
                if (read.getMappingQuality() != QualityUtils.MAPPING_QUALITY_UNAVAILABLE) {
                    numOfReads++;
                }
            }
        }
    }
    if (numOfReads <= 0) {
        numOfReads = -1;  //return -1 to result in a NaN
    }
    return numOfReads;
}