Java Code Examples for htsjdk.variant.variantcontext.VariantContext#getAlternateAllele()

The following examples show how to use htsjdk.variant.variantcontext.VariantContext#getAlternateAllele() . You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: SamRecordScoring.java    From hmftools with GNU General Public License v3.0 6 votes vote down vote up
@NotNull
private static ReadType getReadType(@NotNull final SAMRecord record, @NotNull final VariantContext variant) {
    final Allele alt = variant.getAlternateAllele(0);
    final Allele ref = variant.getReference();
    final int recordIdxOfVariantStart = record.getReadPositionAtReferencePosition(variant.getStart());
    if (recordIdxOfVariantStart == 0) {
        // Variant position was deleted
        return ReadType.MISSING;

    }
    if (variant.isSNP()) {
        if (record.getReadString().charAt(recordIdxOfVariantStart - 1) == alt.getBaseString().charAt(0)) {
            return ReadType.ALT;
        } else {
            return ReadType.REF;
        }
    }
    if (variant.isSimpleInsertion()) {
        return SAMRecords.containsInsert(record, variant.getStart(), alt.getBaseString()) ? ReadType.ALT : ReadType.REF;
    }
    if (variant.isSimpleDeletion()) {
        return SAMRecords.containsDelete(record, variant.getStart(), ref.getBaseString()) ? ReadType.ALT : ReadType.REF;
    }
    return ReadType.OTHER;
}
 
Example 2
Source File: OriginalAlignment.java    From gatk with BSD 3-Clause "New" or "Revised" License 6 votes vote down vote up
@Override
public Map<String, Object> annotate(ReferenceContext ref, VariantContext vc, AlleleLikelihoods<GATKRead, Allele> likelihoods) {
    Utils.nonNull(vc);
    Utils.nonNull(likelihoods);

    final double[] lods = Mutect2FilteringEngine.getTumorLogOdds(vc);
    if (lods==null) {
        warning.warn(String.format("One or more variant contexts is missing the 'TLOD' annotation, %s will not be computed for these VariantContexts", GATKVCFConstants.ORIGINAL_CONTIG_MISMATCH_KEY));
        return Collections.emptyMap();
    }
    final int indexOfMaxLod = MathUtils.maxElementIndex(lods);
    final Allele altAlelle = vc.getAlternateAllele(indexOfMaxLod);
    final Collection<AlleleLikelihoods<GATKRead, Allele>.BestAllele> bestAlleles = likelihoods.bestAllelesBreakingTies();
    final String currentContig = ref.getInterval().getContig();

    final long nonChrMAlt = bestAlleles.stream()
            .filter(ba -> ba.evidence.hasAttribute(AddOriginalAlignmentTags.OA_TAG_NAME) && ba.isInformative() && ba.allele.equals(altAlelle) &&
                    !AddOriginalAlignmentTags.getOAContig(ba.evidence).equals(currentContig))
            .count();
    return ImmutableMap.of(GATKVCFConstants.ORIGINAL_CONTIG_MISMATCH_KEY, nonChrMAlt);
}
 
Example 3
Source File: VariantEval.java    From gatk with BSD 3-Clause "New" or "Revised" License 6 votes vote down vote up
private EvalCompMatchType doEvalAndCompMatch(final VariantContext eval, final VariantContext comp, boolean requireStrictAlleleMatch) {
    if ( comp.getType() == VariantContext.Type.NO_VARIATION || eval.getType() == VariantContext.Type.NO_VARIATION )
        // if either of these are NO_VARIATION they are LENIENT matches
        return EvalCompMatchType.LENIENT;

    if ( comp.getType() != eval.getType() )
        return EvalCompMatchType.NO_MATCH;

    // find the comp which matches both the reference allele and alternate allele from eval
    final Allele altEval = eval.getAlternateAlleles().size() == 0 ? null : eval.getAlternateAllele(0);
    final Allele altComp = comp.getAlternateAlleles().size() == 0 ? null : comp.getAlternateAllele(0);
    if ((altEval == null && altComp == null) || (altEval != null && altEval.equals(altComp) && eval.getReference().equals(comp.getReference())))
        return EvalCompMatchType.STRICT;
    else
        return requireStrictAlleleMatch ? EvalCompMatchType.NO_MATCH : EvalCompMatchType.LENIENT;
}
 
Example 4
Source File: ReadOrientationFilter.java    From gatk with BSD 3-Clause "New" or "Revised" License 6 votes vote down vote up
@VisibleForTesting
double artifactProbability(final ReferenceContext referenceContext, final VariantContext vc, final Genotype g) {
    // As of June 2018, genotype is hom ref iff we have the normal sample, but this may change in the future
    // TODO: handle MNVs
    if (g.isHomRef() || (!vc.isSNP() && !vc.isMNP()) ){
        return 0;
    } else if (!artifactPriorCollections.containsKey(g.getSampleName())) {
        return 0;
    }

    final double[] tumorLods = VariantContextGetters.getAttributeAsDoubleArray(vc, GATKVCFConstants.TUMOR_LOG_10_ODDS_KEY, () -> null, -1);
    final int indexOfMaxTumorLod = MathUtils.maxElementIndex(tumorLods);
    final Allele altAllele = vc.getAlternateAllele(indexOfMaxTumorLod);
    final byte[] altBases = altAllele.getBases();

    // for MNVs, treat each base as an independent substitution and take the maximum of all error probabilities
    return IntStream.range(0, altBases.length).mapToDouble(n -> {
        final Nucleotide altBase = Nucleotide.valueOf(new String(new byte[] {altBases[n]}));

        return artifactProbability(referenceContext, vc.getStart() + n, g, indexOfMaxTumorLod, altBase);
    }).max().orElse(0.0);

}
 
Example 5
Source File: EventMap.java    From gatk with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
/**
 * Create a block substitution out of two variant contexts that start at the same position
 *
 * vc1 can be SNP, and vc2 can then be either a insertion or deletion.
 * If vc1 is an indel, then vc2 must be the opposite type (vc1 deletion => vc2 must be an insertion)
 *
 * @param vc1 the first variant context we want to merge
 * @param vc2 the second
 * @return a block substitution that represents the composite substitution implied by vc1 and vc2
 */
protected VariantContext makeBlock(final VariantContext vc1, final VariantContext vc2) {
    Utils.validateArg( vc1.getStart() == vc2.getStart(), () -> "vc1 and 2 must have the same start but got " + vc1 + " and " + vc2);
    Utils.validateArg( vc1.isBiallelic(), "vc1 must be biallelic");
    if ( ! vc1.isSNP() ) {
        Utils.validateArg ( (vc1.isSimpleDeletion() && vc2.isSimpleInsertion()) || (vc1.isSimpleInsertion() && vc2.isSimpleDeletion()),
                () -> "Can only merge single insertion with deletion (or vice versa) but got " + vc1 + " merging with " + vc2);
    } else {
        Utils.validateArg(!vc2.isSNP(), () -> "vc1 is " + vc1 + " but vc2 is a SNP, which implies there's been some terrible bug in the cigar " + vc2);
    }

    final Allele ref, alt;
    final VariantContextBuilder b = new VariantContextBuilder(vc1);
    if ( vc1.isSNP() ) {
        // we have to repair the first base, so SNP case is special cased
        if ( vc1.getReference().equals(vc2.getReference()) ) {
            // we've got an insertion, so we just update the alt to have the prev alt
            ref = vc1.getReference();
            alt = Allele.create(vc1.getAlternateAllele(0).getDisplayString() + vc2.getAlternateAllele(0).getDisplayString().substring(1), false);
        } else {
            // we're dealing with a deletion, so we patch the ref
            ref = vc2.getReference();
            alt = vc1.getAlternateAllele(0);
            b.stop(vc2.getEnd());
        }
    } else {
        final VariantContext insertion = vc1.isSimpleInsertion() ? vc1 : vc2;
        final VariantContext deletion  = vc1.isSimpleInsertion() ? vc2 : vc1;
        ref = deletion.getReference();
        alt = insertion.getAlternateAllele(0);
        b.stop(deletion.getEnd());
    }

    return b.alleles(Arrays.asList(ref, alt)).make();
}
 
Example 6
Source File: VariantsToTable.java    From gatk with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
private static Object splitAltAlleles(final VariantContext vc) {
    final int numAltAlleles = vc.getAlternateAlleles().size();
    if ( numAltAlleles == 1 ) {
        return vc.getAlternateAllele(0);
    }

    return vc.getAlternateAlleles();
}
 
Example 7
Source File: FuncotatorIntegrationTest.java    From gatk with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
@Test
public void testVCFColumnsArentShuffled() {
    final File outputFile = createTempFile("tmpTestFilterParsing", "vcf");

    final ArgumentsBuilder arguments = createBaselineArgumentsForFuncotator(
            VCF_FIELD_ORDER_SWAP_TEST_VCF,
            outputFile,
            b37Reference,
            VCF_FIELD_ORDER_TEST_DATA_SOURCES,
            FuncotatorTestConstants.REFERENCE_VERSION_HG19,
            FuncotatorArgumentDefinitions.OutputFormatType.VCF,
            false);

    arguments.add(FuncotatorArgumentDefinitions.FORCE_B37_TO_HG19_REFERENCE_CONTIG_CONVERSION, true);

    runCommandLine(arguments);

    final Pair<VCFHeader, List<VariantContext>> tempVcf =  VariantContextTestUtils.readEntireVCFIntoMemory(outputFile.getAbsolutePath());
    Assert.assertEquals( tempVcf.getRight().size(), 1 );

    final String[] funcotatorKeys = FuncotatorUtils.extractFuncotatorKeysFromHeaderDescription(tempVcf.getLeft().getInfoHeaderLine(VcfOutputRenderer.FUNCOTATOR_VCF_FIELD_NAME).getDescription());

    final VariantContext variantContext = tempVcf.getRight().get(0);
    final Map<Allele, FuncotationMap> funcs = FuncotatorUtils.createAlleleToFuncotationMapFromFuncotationVcfAttribute(
            funcotatorKeys, variantContext, "Gencode_19_annotationTranscript", "FAKE_SOURCE");

    Allele allele = variantContext.getAlternateAllele(0);
    final String txId = funcs.get(allele).getTranscriptList().get(0);
    Assert.assertEquals( funcs.get(allele).get(txId).size(), 1 );

    final Funcotation funcotation = funcs.get(allele).get(txId).get(0);

    //Assert that the value of the field F# is F#|F# encoded in Funcotator's percent encoding scheme
    for(int i = 1; i <= 9; i++){
        Assert.assertEquals(funcotation.getField("dbSnp_F"+i), "F"+i+"_%7C_"+"F"+i);
    }
}
 
Example 8
Source File: ApplyVQSR.java    From gatk with BSD 3-Clause "New" or "Revised" License 4 votes vote down vote up
/**
 * Calculate the allele-specific filter status of vc
 * @param vc
 * @param recals
 * @param builder   is modified by adding attributes
 * @return a String with the filter status for this site
 */
private String doAlleleSpecificFiltering(final VariantContext vc, final List<VariantContext> recals, final VariantContextBuilder builder) {
    double bestLod = VariantRecalibratorEngine.MIN_ACCEPTABLE_LOD_SCORE;
    final List<String> culpritStrings = new ArrayList<>();
    final List<String> lodStrings = new ArrayList<>();
    final List<String> AS_filterStrings = new ArrayList<>();

    String[] prevCulpritList = null;
    String[] prevLodList = null;
    String[] prevASfiltersList = null;

    //get VQSR annotations from previous run of ApplyRecalibration, if applicable
    if(foundINDELTranches || foundSNPTranches) {
        final String prevCulprits = vc.getAttributeAsString(GATKVCFConstants.AS_CULPRIT_KEY,"");
        prevCulpritList = prevCulprits.isEmpty()? new String[0] : prevCulprits.split(listPrintSeparator);
        final String prevLodString = vc.getAttributeAsString(GATKVCFConstants.AS_VQS_LOD_KEY,"");
        prevLodList = prevLodString.isEmpty()? new String[0] : prevLodString.split(listPrintSeparator);
        final String prevASfilters = vc.getAttributeAsString(GATKVCFConstants.AS_FILTER_STATUS_KEY,"");
        prevASfiltersList = prevASfilters.isEmpty()? new String[0] : prevASfilters.split(listPrintSeparator);
    }

    //for each allele in the current VariantContext
    for (int altIndex = 0; altIndex < vc.getNAlleles()-1; altIndex++) {
        final Allele allele = vc.getAlternateAllele(altIndex);

        //if the current allele is not part of this recalibration mode, add its annotations to the list and go to the next allele
        if (!VariantDataManager.checkVariationClass(vc, allele, MODE)) {
            updateAnnotationsWithoutRecalibrating(altIndex, prevCulpritList, prevLodList, prevASfiltersList, culpritStrings, lodStrings, AS_filterStrings);
            continue;
        }

        //if the current allele does need to have recalibration applied...

        //initialize allele-specific VQSR annotation data with values for spanning deletion
        String alleleLodString = emptyFloatValue;
        String alleleFilterString = emptyStringValue;
        String alleleCulpritString = emptyStringValue;

        //if it's not a spanning deletion, replace those allele strings with the real values
        if (!GATKVCFConstants.isSpanningDeletion(allele)) {
            VariantContext recalDatum = getMatchingRecalVC(vc, recals, allele);
            if (recalDatum == null) {
                throw new UserException("Encountered input allele which isn't found in the input recal file. Please make sure VariantRecalibrator and ApplyRecalibration were run on the same set of input variants with flag -AS. First seen at: " + vc);
            }

            //compare VQSLODs for all alleles in the current mode for filtering later
            final double lod = recalDatum.getAttributeAsDouble(GATKVCFConstants.VQS_LOD_KEY, VariantRecalibratorEngine.MIN_ACCEPTABLE_LOD_SCORE);
            if (lod > bestLod)
                bestLod = lod;

            alleleLodString = String.format("%.4f", lod);
            alleleFilterString = generateFilterString(lod);
            alleleCulpritString = recalDatum.getAttributeAsString(GATKVCFConstants.CULPRIT_KEY, ".");

            if(recalDatum != null) {
                if (recalDatum.hasAttribute(GATKVCFConstants.POSITIVE_LABEL_KEY))
                    builder.attribute(GATKVCFConstants.POSITIVE_LABEL_KEY, true);
                if (recalDatum.hasAttribute(GATKVCFConstants.NEGATIVE_LABEL_KEY))
                    builder.attribute(GATKVCFConstants.NEGATIVE_LABEL_KEY, true);
            }
        }

        //append per-allele VQSR annotations
        lodStrings.add(alleleLodString);
        AS_filterStrings.add(alleleFilterString);
        culpritStrings.add(alleleCulpritString);
    }

    // Annotate the new record with its VQSLOD, AS_FilterStatus, and the worst performing annotation
    if(!AS_filterStrings.isEmpty() )
        builder.attribute(GATKVCFConstants.AS_FILTER_STATUS_KEY, AnnotationUtils.encodeStringList(AS_filterStrings));
    if(!lodStrings.isEmpty())
        builder.attribute(GATKVCFConstants.AS_VQS_LOD_KEY, AnnotationUtils.encodeStringList(lodStrings));
    if(!culpritStrings.isEmpty())
        builder.attribute(GATKVCFConstants.AS_CULPRIT_KEY, AnnotationUtils.encodeStringList(culpritStrings));

    return generateFilterStringFromAlleles(vc, bestLod);
}
 
Example 9
Source File: CustomMafFuncotationCreator.java    From gatk with BSD 3-Clause "New" or "Revised" License 4 votes vote down vote up
/**
 * Create the MAF count fields (See {@link CustomMafFuncotationCreator#COUNT_FIELD_NAMES}) for a single pair in the given variant.
 *
 * If no samples can be linked to a tumor and normal combination, then blank values are generated for the count fields.
 * @param variant Never {@code null}
 * @param tnPair Never {@code null}
 * @return List of funcotations for the pair.  This list will usually be of length one, unless multiallelics are involved.
 *  Never {@code null}  Field values can be blank:
 *  - if there are no AD or AF format fields
 *  - if the specified pair is not in the variant sample list.
 */
private static List<Funcotation> createCustomMafCountFields(final VariantContext variant, final TumorNormalPair tnPair) {
    Utils.nonNull(variant);
    Utils.nonNull(tnPair);
    final List<Funcotation> result = new ArrayList<>();

    for (int i = 0; i < variant.getAlternateAlleles().size(); i++) {
        final Allele allele = variant.getAlternateAllele(i);
        final String tumorSampleName = tnPair.getTumor();
        final String normalSampleName = tnPair.getNormal();

        // This code assumes that the AD field is of Type "R"
        final Genotype tumorGenotype = variant.getGenotype(tumorSampleName);
        final boolean hasTumorAD = (tumorGenotype != null) && (tumorGenotype.hasAD());
        final boolean hasTumorAF = (tumorGenotype != null) && (tumorGenotype.hasAnyAttribute(GATKVCFConstants.ALLELE_FRACTION_KEY));

        // Just make a quick check that the tumor genotype has allelic depth for both a ref and alt
        // This is REQUIRED, but can be missing and our validations seem to miss it:
        if ( hasTumorAD && tumorGenotype.getAD().length < 2 ) {
            throw new UserException("Allelic Depth (AD field) for Variant[" +
                    variant.getContig() + ":" + variant.getStart() + "_" +
                    variant.getReference().toString() + "->" + variant.getAlternateAlleles().get(0).toString() + ']' +
                    " Does not contain both a REF and an ALT value (only one value is present)!");
        }

        final String tAltCount = hasTumorAD ? Integer.toString(tumorGenotype.getAD()[i + 1]) : "";
        final String tRefCount = hasTumorAD ? Integer.toString(tumorGenotype.getAD()[0]) : "";
        final String tumorFAllAlleles = hasTumorAF ? tumorGenotype.getAnyAttribute(GATKVCFConstants.ALLELE_FRACTION_KEY).toString() : "";
        final String tumorF = hasTumorAF ? StringUtils.split(tumorFAllAlleles, ",")[i] : "";

        final Genotype normalGenotype = variant.getGenotype(normalSampleName);
        final boolean hasNormalAD = (normalGenotype != null) && (normalGenotype.hasAD());
        final String nAltCount = hasNormalAD ? Integer.toString(normalGenotype.getAD()[i + 1]) : "";
        final String nRefCount = hasNormalAD ? Integer.toString(normalGenotype.getAD()[0]) : "";

        final List<String> fieldValues = Arrays.asList(
                tAltCount,
                tRefCount,
                nAltCount,
                nRefCount,
                tumorF);

        result.add(TableFuncotation.create(COUNT_FIELD_NAMES, fieldValues, allele, MafOutputRendererConstants.MAF_COUNT_RENDERING_DATASOURCE_DUMMY_NAME, createCustomMafCountFieldsMetadata()));
    }

    return result;
}
 
Example 10
Source File: VcfFuncotationFactory.java    From gatk with BSD 3-Clause "New" or "Revised" License 4 votes vote down vote up
/**
 * {@inheritDoc}
 *
 * This is really the only entry point for the Vcf FuncotationFactory.
 *
 * {@link VcfFuncotationFactory} can be used with or without Gencode annotations.
 */
@Override
protected List<Funcotation> createFuncotationsOnVariant(final VariantContext variant, final ReferenceContext referenceContext, final List<Feature> featureList) {

    final List<Funcotation> outputFuncotations = new ArrayList<>();

    // TODO: Caching logic can be refactored and shared in other funcotation factories:  https://github.com/broadinstitute/gatk/issues/4974
    final Triple<VariantContext, ReferenceContext, List<Feature>> cacheKey = createCacheKey(variant, referenceContext, featureList);
    final List<Funcotation> cacheResult = cache.get(cacheKey);
    if (cacheResult != null) {
        cacheHits++;
        return cacheResult;
    }

    // Only create annotations if we have data to annotate:
    if ( supportedFieldNames.size() != 0 ) {

        // Get rid of any null features.
        // By this point we know the feature type is correct, so we cast it:
        final List<VariantContext> featureVariantList = featureList.stream().filter(f -> f != null)
                .map(f -> (VariantContext) f).collect(Collectors.toList());

        // Create a map that will keep the final outputs.  Default it to default funcotations for each alt allele in the
        //  query variant.
        final Map<Allele, Funcotation> outputOrderedMap = new LinkedHashMap<>();

        for ( final VariantContext featureVariant : featureVariantList ) {

            // The featureVariantList already overlaps the query variant in position, now get which
            //  match in ref/alt as well.  And make sure to handle multiallelics in both the query variant and the
            //  funcotation factory variant.
            // matchIndices length will always be the same as the number of alt alleles in the variant (first parameter)
            //  Note that this is not the same length as the featureVariant.
            final int[] matchIndices = GATKVariantContextUtils.matchAllelesOnly(variant, featureVariant);

            for (int i = 0; i < matchIndices.length; i++) {
                final int matchIndex = matchIndices[i];
                final Allele queryAltAllele = variant.getAlternateAllele(i);
                if (matchIndex != -1) {

                    final LinkedHashMap<String, String> annotations = new LinkedHashMap<>(supportedFieldNamesAndDefaults);

                    for (final Map.Entry<String, Object> entry : featureVariant.getAttributes().entrySet()) {
                        populateAnnotationMap(featureVariant, variant, matchIndex, annotations, entry);
                    }

                    // Add the ID of the variant:
                    annotations.put(createFinalFieldName(name, ID_FIELD_NAME), featureVariant.getID());

                    // Add the FILTER status of the variant:
                    annotations.put(createFinalFieldName(name, FILTER_FIELD_NAME), featureVariant.getFilters().stream().collect(Collectors.joining(";")));

                    final TableFuncotation newFuncotation = TableFuncotation.create(annotations, queryAltAllele, name, supportedFieldMetadata);
                    outputOrderedMap.merge(queryAltAllele, newFuncotation, VcfFuncotationFactory::mergeDuplicateFuncotationFactoryVariant);
                }
            }
        }
        variant.getAlternateAlleles().forEach(a -> outputFuncotations.add(outputOrderedMap.computeIfAbsent(a, allele -> createDefaultFuncotation(allele))));
    }
    cacheMisses++;
    cache.put(cacheKey, outputFuncotations);

    // The output number of funcotations should equal to the variant.getAlternateAlleles().size()
    return outputFuncotations;
}