htsjdk.variant.variantcontext.VariantContextBuilder Java Examples

The following examples show how to use htsjdk.variant.variantcontext.VariantContextBuilder. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: LeftAlignAndTrimVariantsUnitTest.java    From gatk with BSD 3-Clause "New" or "Revised" License 6 votes vote down vote up
@Test
public void testLeftAlignWithVaryingMaxDistances() {

    final byte[] refSequence = new byte[200];
    Arrays.fill(refSequence, 0, 100, (byte) 'C');
    Arrays.fill(refSequence, 100, 200, (byte) 'A');

    final String contig = "1";
    final SAMFileHeader header = ArtificialReadUtils.createArtificialSamHeader(1, 1, refSequence.length);
    final SimpleInterval interval = new SimpleInterval(contig, 1, refSequence.length);


    final ReferenceBases refBases = new ReferenceBases(refSequence, interval);
    final ReferenceDataSource referenceDataSource = new ReferenceMemorySource(refBases, header.getSequenceDictionary());
    final ReferenceContext referenceContext = new ReferenceContext(referenceDataSource, interval);

    final List<Allele> alleles = Arrays.asList(Allele.create("AA", true), Allele.create("A", false));
    for (int maxShift : new int[] {0, 1, 5, 10, 30}) {
        for (int start : new int[]{101, 105, 109, 110, 111, 115, 120, 130, 141}) {
            final VariantContext vc = new VariantContextBuilder("source", contig, start, start + 1, alleles).make();
            final VariantContext realignedV = LeftAlignAndTrimVariants.leftAlignAndTrim(vc, referenceContext, maxShift, true);

            Assert.assertEquals(realignedV.getStart(), Math.max(start - maxShift, 100));
        }
    }
}
 
Example #2
Source File: SelectVariants.java    From gatk with BSD 3-Clause "New" or "Revised" License 6 votes vote down vote up
private VariantContext buildVariantContextWithDroppedAnnotationsRemoved(final VariantContext vc) {
    if (infoAnnotationsToDrop.isEmpty() && genotypeAnnotationsToDrop.isEmpty()) {
        return vc;
    }
    final VariantContextBuilder rmAnnotationsBuilder = new VariantContextBuilder(vc);
    for (String infoField : infoAnnotationsToDrop) {
        rmAnnotationsBuilder.rmAttribute(infoField);
    }
    if (!genotypeAnnotationsToDrop.isEmpty()) {
        final ArrayList<Genotype> genotypesToWrite = new ArrayList<>();
        for (Genotype genotype : vc.getGenotypes()) {
            final GenotypeBuilder genotypeBuilder = new GenotypeBuilder(genotype).noAttributes();
            final Map<String, Object> attributes = new HashMap<>(genotype.getExtendedAttributes());
            for (String genotypeAnnotation : genotypeAnnotationsToDrop) {
                attributes.remove(genotypeAnnotation);
            }
            genotypeBuilder.attributes(attributes);
            genotypesToWrite.add(genotypeBuilder.make());
        }
        rmAnnotationsBuilder.genotypes(GenotypesContext.create(genotypesToWrite));
    }
    return rmAnnotationsBuilder.make();
}
 
Example #3
Source File: CNNScoreVariants.java    From gatk with BSD 3-Clause "New" or "Revised" License 6 votes vote down vote up
@Override
protected void secondPassApply(VariantContext variant, ReadsContext readsContext, ReferenceContext referenceContext, FeatureContext featureContext) {
    String sv = scoreScan.nextLine();
    String[] scoredVariant = sv.split("\\t");

    if (variant.getContig().equals(scoredVariant[CONTIG_INDEX])
            && Integer.toString(variant.getStart()).equals(scoredVariant[POS_INDEX])
            && variant.getReference().getBaseString().equals(scoredVariant[REF_INDEX])
            && variant.getAlternateAlleles().toString().equals(scoredVariant[ALT_INDEX])) {

        final VariantContextBuilder builder = new VariantContextBuilder(variant);
        if (scoredVariant.length > KEY_INDEX) {
            builder.attribute(scoreKey, scoredVariant[KEY_INDEX]);
        }
        vcfWriter.add(builder.make());

    } else {
        String errorMsg = "Score file out of sync with original VCF. Score file has:" + sv;
        errorMsg += "\n But VCF has:" + variant.toStringWithoutGenotypes();
        throw new GATKException(errorMsg);
    }
}
 
Example #4
Source File: VariantDataManager.java    From gatk with BSD 3-Clause "New" or "Revised" License 6 votes vote down vote up
public void writeOutRecalibrationTable(final VariantContextWriter recalWriter, final SAMSequenceDictionary seqDictionary) {
    // we need to sort in coordinate order in order to produce a valid VCF
    Collections.sort( data, VariantDatum.getComparator(seqDictionary) );

    // create dummy alleles to be used
    List<Allele> alleles = Arrays.asList(Allele.create("N", true), Allele.create("<VQSR>", false));

    for( final VariantDatum datum : data ) {
        if (VRAC.useASannotations)
            alleles = Arrays.asList(datum.referenceAllele, datum.alternateAllele); //use the alleles to distinguish between multiallelics in AS mode
        VariantContextBuilder builder = new VariantContextBuilder("VQSR", datum.loc.getContig(), datum.loc.getStart(), datum.loc.getEnd(), alleles);
        builder.attribute(VCFConstants.END_KEY, datum.loc.getEnd());
        builder.attribute(GATKVCFConstants.VQS_LOD_KEY, String.format("%.4f", datum.lod));
        builder.attribute(GATKVCFConstants.CULPRIT_KEY, (datum.worstAnnotation != -1 ? annotationKeys.get(datum.worstAnnotation) : "NULL"));

        if ( datum.atTrainingSite ) builder.attribute(GATKVCFConstants.POSITIVE_LABEL_KEY, true);
        if ( datum.atAntiTrainingSite ) builder.attribute(GATKVCFConstants.NEGATIVE_LABEL_KEY, true);

        recalWriter.add(builder.make());
    }
}
 
Example #5
Source File: FilterVariantTranches.java    From gatk with BSD 3-Clause "New" or "Revised" License 6 votes vote down vote up
@Override
protected void secondPassApply(VariantContext variant, ReadsContext readsContext, ReferenceContext referenceContext, FeatureContext featureContext) {
    final VariantContextBuilder builder = new VariantContextBuilder(variant);

    if (removeOldFilters) {
        builder.unfiltered();
    }

    if (variant.hasAttribute(infoKey)) {
        final double score = Double.parseDouble((String) variant.getAttribute(infoKey));
        if (variant.isSNP() && snpCutoffs.size() != 0 && isTrancheFiltered(score, snpCutoffs)) {
            builder.filter(filterStringFromScore(SNPString, score, snpTranches, snpCutoffs));
            filteredSnps++;
        } else if (variant.isIndel() && indelCutoffs.size() != 0 && isTrancheFiltered(score, indelCutoffs)) {
            builder.filter(filterStringFromScore(INDELString, score, indelTranches, indelCutoffs));
            filteredIndels++;
        }
    }

    if (builder.getFilters() == null || builder.getFilters().size() == 0){
        builder.passFilters();
    }
    
    vcfWriter.add(builder.make());
}
 
Example #6
Source File: VariantContextSingletonFilterTest.java    From Drop-seq with MIT License 6 votes vote down vote up
@Test
public void filterOutHetSinglton2() {
 VariantContextBuilder b = new VariantContextBuilder();
 Allele a1 = Allele.create("A", true);
 Allele a2 = Allele.create("T", false);

 final List<Allele> allelesHet = new ArrayList<>(Arrays.asList(a1,a2));
 final List<Allele> allelesRef = new ArrayList<>(Arrays.asList(a1,a1));
 final List<Allele> allelesAlt = new ArrayList<>(Arrays.asList(a2,a2));

 // this does not have a het singleton.
 Collection<Genotype> genotypes = new ArrayList<>();
 genotypes.add(GenotypeBuilder.create("donor1", allelesRef));
 genotypes.add(GenotypeBuilder.create("donor2", allelesHet));
 genotypes.add(GenotypeBuilder.create("donor3", allelesHet));

 VariantContext vc = b.alleles(allelesHet).start(1).stop(1).chr("1").genotypes(genotypes).make();
 Assert.assertNotNull(vc);

 Iterator<VariantContext> underlyingIterator = Collections.emptyIterator();

 VariantContextSingletonFilter f = new VariantContextSingletonFilter(underlyingIterator, true);
 boolean t1 = f.filterOut(vc);
 Assert.assertTrue(t1);

}
 
Example #7
Source File: SegmentedCpxVariantSimpleVariantExtractor.java    From gatk with BSD 3-Clause "New" or "Revised" License 6 votes vote down vote up
/**
 * Convert short, i.e. duplicated range is < 50 bp, duplication call to insertion call.
 */
@VisibleForTesting
static VariantContext postProcessConvertShortDupToIns(final VariantContext simple) {
    final String type = simple.getAttributeAsString(SVTYPE, "");
    if ( type.equals(SimpleSVType.SupportedType.DUP.name()) ) {
        final SimpleInterval duplicatedRegion = new SimpleInterval(simple.getAttributeAsString(DUP_REPEAT_UNIT_REF_SPAN, ""));
        if (duplicatedRegion.size() > EVENT_SIZE_THRESHOLD) {
            return simple;
        } else {
            return new VariantContextBuilder(simple)
                    .alleles(Arrays.asList(simple.getReference(), altSymbAlleleIns))
                    .rmAttribute(SVTYPE)
                    .attribute(SVTYPE, SimpleSVType.SupportedType.INS.name())
                    .make();
        }
    } else
        return simple;
}
 
Example #8
Source File: AnnotatedIntervalToSegmentVariantContextConverter.java    From gatk with BSD 3-Clause "New" or "Revised" License 6 votes vote down vote up
/**
 * Convert a segment (as annotated interval) to a {@link VariantContext}.  The variant context will have a ref allele
 *  of the starting base of the segment.  The end position will be in the attribute {@link VCFConstants#END_KEY}.
 *  Any remaining annotations in the annotated interval will be converted to string attributes.  The segment call
 *  will be the alternate allele.  Currently, the alternate allele will be one of neutral, amplification, or deletion.
 *
 * The variant context will use symbolic alleles for the calls (copy neutral, amplification, or deletion).
 * See {@link CalledCopyRatioSegment.Call} for insertions and and deletions.
 *
 * @param segment Never {@code null}
 * @param referenceContext Never {@code null}
 * @return Never {@code null}
 */
public static VariantContext convert(final AnnotatedInterval segment, final ReferenceContext referenceContext) {
    Utils.nonNull(segment);
    Utils.nonNull(referenceContext);
    final SimpleInterval startPointInterval = new SimpleInterval(segment.getContig(), segment.getStart(), segment.getStart());
    final Allele refAlleleAtStart = Allele.create(referenceContext.getBases(startPointInterval), true);

    final CalledCopyRatioSegment.Call call = retrieveCall(segment);

    final VariantContextBuilder variantContextBuilder = new VariantContextBuilder().chr(segment.getContig())
            .start(segment.getStart())
            .stop(segment.getEnd())
            .alleles(Arrays.asList(
                    Allele.create(refAlleleAtStart, false),
                    convertActualSegCallToAllele(call)
            ))
            .attribute(VCFConstants.END_KEY, segment.getEnd());

    // Grab every field in the annotated interval and make it a string attribute
    segment.getAnnotations().keySet().stream().forEach(k -> variantContextBuilder.attribute(k, segment.getAnnotationValue(k)));

    return variantContextBuilder
            .make();
}
 
Example #9
Source File: CountNsUnitTest.java    From gatk with BSD 3-Clause "New" or "Revised" License 6 votes vote down vote up
@Test
public void testDoesReadHaveN() {
    final SAMFileHeader header = ArtificialReadUtils.createArtificialSamHeader(1, 1, 1000);
    final String contig = "chr1";

    final int pos = 100;
    final VariantContext vc = new VariantContextBuilder().chr(contig).start(pos).stop(pos).alleles("A","C").make();

    final GATKRead nonOverlappingUpstreamRead = ArtificialReadUtils.createArtificialRead(header, "read", 0, 96, new byte[] {'A','N','G'}, new byte[] {20,20,20}, "3M");
    Assert.assertFalse(CountNs.doesReadHaveN(nonOverlappingUpstreamRead, vc));

    final GATKRead nonOverlappingDownstreamRead = ArtificialReadUtils.createArtificialRead(header, "read", 0, 101, new byte[] {'A','N','G'}, new byte[] {20,20,20}, "3M");
    Assert.assertFalse(CountNs.doesReadHaveN(nonOverlappingDownstreamRead, vc));

    final GATKRead spanningDeletionRead = ArtificialReadUtils.createArtificialRead(header, "read", 0, 95, new byte[] {'N','N','N','N','N','N'}, new byte[] {20,20,20,20,20,20}, "3M10D3M");
    Assert.assertFalse(CountNs.doesReadHaveN(spanningDeletionRead, vc));

    final GATKRead notN = ArtificialReadUtils.createArtificialRead(header, "read", 0, 99, new byte[] {'A','C','G'}, new byte[] {20,20,20}, "3M");
    Assert.assertFalse(CountNs.doesReadHaveN(notN, vc));

    final GATKRead yesN = ArtificialReadUtils.createArtificialRead(header, "read", 0, 99, new byte[] {'A','N','G'}, new byte[] {20,20,20}, "3M");
    Assert.assertTrue(CountNs.doesReadHaveN(yesN, vc));

}
 
Example #10
Source File: SelectVariantsUnitTest.java    From gatk with BSD 3-Clause "New" or "Revised" License 6 votes vote down vote up
@Test(dataProvider = "MaxMinIndelSize")
public void maxIndelSizeTest(final int size, final int otherSize, final int max, final int min, final String op) {

    final byte[] largerAllele = Utils.dupBytes((byte) 'A', size+1);
    final byte[] smallerAllele = Utils.dupBytes((byte) 'A', 1);

    final List<Allele> alleles = new ArrayList<>(2);
    final Allele ref = Allele.create(op.equals("I") ? smallerAllele : largerAllele, true);
    final Allele alt = Allele.create(op.equals("D") ? smallerAllele : largerAllele, false);
    alleles.add(ref);
    alleles.add(alt);

    final VariantContext vc = new VariantContextBuilder("test", "1", 10, 10 + ref.length() - 1, alleles).make();

    final boolean hasIndelTooLargeOrSmall = SelectVariants.containsIndelLargerOrSmallerThan(vc, max, min);
    Assert.assertEquals(hasIndelTooLargeOrSmall, size > max || size < min);
}
 
Example #11
Source File: SvDiscoverFromLocalAssemblyContigAlignmentsSpark.java    From gatk with BSD 3-Clause "New" or "Revised" License 6 votes vote down vote up
/**
 * Filters out variants by testing against provided
 * filter key, threshold.
 *
 * Variants with value below specified threshold (or null value)
 * are filtered out citing given reason.
 *
 * @throws ClassCastException if the value corresponding to provided key cannot be casted as a {@link Double}
 */
private static VariantContext applyFilters(final VariantContext variantContext,
                                           final List<StructuralVariantFilter> filters) {

    final Set<String> appliedFilters = new HashSet<>();
    for (final StructuralVariantFilter filter : filters) {
        if ( !filter.test(variantContext) )
            appliedFilters.add(filter.getName());
    }

    if (appliedFilters.isEmpty())
        return variantContext;
    else {
        return new VariantContextBuilder(variantContext).filters(appliedFilters).make();
    }
}
 
Example #12
Source File: AlleleFractionGenotypeAnnotationUnitTest.java    From gatk with BSD 3-Clause "New" or "Revised" License 6 votes vote down vote up
@DataProvider(name = "testUsingADDataProvider")
public Object[][] testUsingADDataProvider() {
    final Allele A = Allele.create("A", true);
    final Allele C = Allele.create("C");

    final List<Allele> AA = Arrays.asList(A, A);
    final List<Allele> AC = Arrays.asList(A, C);

    final Genotype gAC = new GenotypeBuilder("1", AC).AD(new int[]{5,5}).make();
    final Genotype gAA = new GenotypeBuilder("2", AA).AD(new int[]{10,0}).make();

    final VariantContext vc = new VariantContextBuilder("test", "20", 10, 10, AC).genotypes(Arrays.asList(gAA, gAC)).make();

    return new Object[][] {
            {vc, gAC, new double[]{0.5}},
            {vc, gAA, new double[]{0.0}}
        };
}
 
Example #13
Source File: CNVInputReader.java    From gatk with BSD 3-Clause "New" or "Revised" License 6 votes vote down vote up
/**
 * Loads an external cnv call list and returns the results in an SVIntervalTree. NB: the contig indices in the SVIntervalTree
 * are based on the sequence indices in the SAM header, _NOT_ the ReadMetadata (which we might not have access to at this
 * time).
 */
public static SVIntervalTree<VariantContext> loadCNVCalls(final String cnvCallsFile,
                                                           final SAMFileHeader headerForReads) {
    Utils.validate(cnvCallsFile != null, "Can't load null CNV calls file");
    try ( final FeatureDataSource<VariantContext> dataSource = new FeatureDataSource<>(cnvCallsFile, null, 0, null) ) {

        final String sampleId = SVUtils.getSampleId(headerForReads);
        validateCNVcallDataSource(headerForReads, sampleId, dataSource);

        final SVIntervalTree<VariantContext> cnvCallTree = new SVIntervalTree<>();
        Utils.stream(dataSource.iterator())
                .map(vc -> new VariantContextBuilder(vc).genotypes(vc.getGenotype(sampleId)).make()) // forces a decode of the genotype for serialization purposes
                .map(vc -> new Tuple2<>(new SVInterval(headerForReads.getSequenceIndex(vc.getContig()), vc.getStart(), vc.getEnd()),vc))
                .forEach(pv -> cnvCallTree.put(pv._1(), pv._2()));
        return cnvCallTree;
    }
}
 
Example #14
Source File: LiftoverVcfTest.java    From picard with MIT License 5 votes vote down vote up
@Test(dataProvider = "leftAlignAllelesData")
public void testLeftAlignVariants(final VariantContext source, final ReferenceSequence reference, final VariantContext result) {
    VariantContextBuilder vcb = new VariantContextBuilder(source);

    LiftoverUtils.leftAlignVariant(vcb, source.getStart(), source.getEnd(), source.getAlleles(), reference);
    vcb.genotypes(LiftoverUtils.fixGenotypes(source.getGenotypes(), source.getAlleles(), vcb.getAlleles()));

    VcfTestUtils.assertEquals(vcb.make(), result);
}
 
Example #15
Source File: AssemblyBasedSVDiscoveryTestDataProvider.java    From gatk with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
static final VariantContextBuilder makeBND(final SimpleInterval upstreamLoc, final SimpleInterval dnstreamLoc,
                                           final Allele refAllele, final String insertedSeq, final String bndSubtypeString,
                                           final boolean forUpstreamLoc, final boolean refBaseFirst, final boolean bracketPointsLeft) {

    final String upstreamAltString = upstreamLoc.getContig() + ":" + upstreamLoc.getStart();
    final String dnstreamAltString = dnstreamLoc.getContig() + ":" + dnstreamLoc.getEnd();

    final Allele altAllele;
    if (refBaseFirst) {
        if (bracketPointsLeft)
            altAllele = Allele.create(refAllele.getBaseString() + insertedSeq + "]" + (forUpstreamLoc ? dnstreamAltString : upstreamAltString) + "]");
        else
            altAllele = Allele.create(refAllele.getBaseString() + insertedSeq + "[" + (forUpstreamLoc ? dnstreamAltString : upstreamAltString) + "[");
    } else {
        if (bracketPointsLeft)
            altAllele = Allele.create("]" + (forUpstreamLoc ? dnstreamAltString : upstreamAltString) + "]" + insertedSeq + refAllele.getBaseString());
        else
            altAllele = Allele.create("[" + (forUpstreamLoc ? dnstreamAltString : upstreamAltString) + "[" + insertedSeq + refAllele.getBaseString());
    }
    if (forUpstreamLoc) {
        return new VariantContextBuilder().chr(upstreamLoc.getContig()).start(upstreamLoc.getStart()).stop(upstreamLoc.getEnd())
                .alleles(Arrays.asList(refAllele, altAllele))
                .id(makeID(BREAKEND_STR + (bndSubtypeString.isEmpty() ? "" : INTERVAL_VARIANT_ID_FIELD_SEPARATOR + bndSubtypeString),
                        upstreamLoc.getContig(), upstreamLoc.getStart(), dnstreamLoc.getContig(), dnstreamLoc.getEnd(), "1"))
                .attribute(SVTYPE, BREAKEND_STR);
    } else {
        return new VariantContextBuilder().chr(dnstreamLoc.getContig()).start(dnstreamLoc.getStart()).stop(dnstreamLoc.getEnd())
                .alleles(Arrays.asList(refAllele, altAllele))
                .id(makeID(BREAKEND_STR + (bndSubtypeString.isEmpty() ? "" : INTERVAL_VARIANT_ID_FIELD_SEPARATOR + bndSubtypeString),
                        upstreamLoc.getContig(), upstreamLoc.getStart(), dnstreamLoc.getContig(), dnstreamLoc.getEnd(), "2"))
                .attribute(SVTYPE, BREAKEND_STR);
    }
}
 
Example #16
Source File: EventMap.java    From gatk with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
/**
 * Create a block substitution out of two variant contexts that start at the same position
 *
 * vc1 can be SNP, and vc2 can then be either a insertion or deletion.
 * If vc1 is an indel, then vc2 must be the opposite type (vc1 deletion => vc2 must be an insertion)
 *
 * @param vc1 the first variant context we want to merge
 * @param vc2 the second
 * @return a block substitution that represents the composite substitution implied by vc1 and vc2
 */
protected VariantContext makeBlock(final VariantContext vc1, final VariantContext vc2) {
    Utils.validateArg( vc1.getStart() == vc2.getStart(), () -> "vc1 and 2 must have the same start but got " + vc1 + " and " + vc2);
    Utils.validateArg( vc1.isBiallelic(), "vc1 must be biallelic");
    if ( ! vc1.isSNP() ) {
        Utils.validateArg ( (vc1.isSimpleDeletion() && vc2.isSimpleInsertion()) || (vc1.isSimpleInsertion() && vc2.isSimpleDeletion()),
                () -> "Can only merge single insertion with deletion (or vice versa) but got " + vc1 + " merging with " + vc2);
    } else {
        Utils.validateArg(!vc2.isSNP(), () -> "vc1 is " + vc1 + " but vc2 is a SNP, which implies there's been some terrible bug in the cigar " + vc2);
    }

    final Allele ref, alt;
    final VariantContextBuilder b = new VariantContextBuilder(vc1);
    if ( vc1.isSNP() ) {
        // we have to repair the first base, so SNP case is special cased
        if ( vc1.getReference().equals(vc2.getReference()) ) {
            // we've got an insertion, so we just update the alt to have the prev alt
            ref = vc1.getReference();
            alt = Allele.create(vc1.getAlternateAllele(0).getDisplayString() + vc2.getAlternateAllele(0).getDisplayString().substring(1), false);
        } else {
            // we're dealing with a deletion, so we patch the ref
            ref = vc2.getReference();
            alt = vc1.getAlternateAllele(0);
            b.stop(vc2.getEnd());
        }
    } else {
        final VariantContext insertion = vc1.isSimpleInsertion() ? vc1 : vc2;
        final VariantContext deletion  = vc1.isSimpleInsertion() ? vc2 : vc1;
        ref = deletion.getReference();
        alt = insertion.getAlternateAllele(0);
        b.stop(deletion.getEnd());
    }

    return b.alleles(Arrays.asList(ref, alt)).make();
}
 
Example #17
Source File: AS_QualByDepthUnitTest.java    From gatk with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
@Test
public void testParseQualList() {
    final int NON_REF_INDEX = 1; //finalized QD values won't have an entry for ref
    final String goodQualList = "|234|0"; //combined VCs from GenomicsDB have zero value for NON-REF
    final String trickyQualList = "|234|"; //older single-sample GVCFs don't have a value for NON-REF -- GenomicsDB assigns an empty value, which is fair
    final VariantContext withNonRefValue = new VariantContextBuilder(GATKVariantContextUtils.makeFromAlleles("good", "chr1", 10001, Arrays.asList("A","T", Allele.NON_REF_STRING))).
            attribute(GATKVCFConstants.AS_RAW_QUAL_APPROX_KEY, goodQualList).make();
    final VariantContext withNoNonRefValue = new VariantContextBuilder(GATKVariantContextUtils.makeFromAlleles("good", "chr1", 10001, Arrays.asList("A","T", Allele.NON_REF_STRING))).
            attribute(GATKVCFConstants.AS_RAW_QUAL_APPROX_KEY, trickyQualList).make();
    Assert.assertEquals(AS_QualByDepth.parseQualList(withNonRefValue).size(), withNonRefValue.getAlternateAlleles().size());
    final List<Integer> qualsFromNoNonRefList = AS_QualByDepth.parseQualList(withNoNonRefValue);
    Assert.assertEquals(qualsFromNoNonRefList.size(), withNonRefValue.getAlternateAlleles().size());
    Assert.assertEquals((int)qualsFromNoNonRefList.get(NON_REF_INDEX), 0);  //int cast because Integer results in ambiguous method call for assert
}
 
Example #18
Source File: ApplyVQSR.java    From gatk with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
@Override
public void apply(final VariantContext vc, final ReadsContext readsContext, final ReferenceContext ref, final FeatureContext featureContext) {

    final List<VariantContext> recals =  featureContext.getValues(recal, vc.getStart());
    final boolean evaluateThisVariant = useASannotations || VariantDataManager.checkVariationClass( vc, MODE );

    //vc.isNotFiltered is true for PASS; vc.filtersHaveBeenApplied covers PASS and filters
    final boolean variantIsNotFiltered = IGNORE_ALL_FILTERS || vc.isNotFiltered() ||
            (!ignoreInputFilterSet.isEmpty() && ignoreInputFilterSet.containsAll(vc.getFilters()));

    if( evaluateThisVariant && variantIsNotFiltered) {
        String filterString;
        final VariantContextBuilder builder = new VariantContextBuilder(vc);
        if (!useASannotations) {
            filterString = doSiteSpecificFiltering(vc, recals, builder);
        }
        else {  //allele-specific mode
            filterString = doAlleleSpecificFiltering(vc, recals, builder);
        }

        //for both non-AS and AS modes:
        if( filterString.equals(VCFConstants.PASSES_FILTERS_v4) ) {
            builder.passFilters();
        } else if(filterString.equals(VCFConstants.UNFILTERED)) {
            builder.unfiltered();
        } else {
            builder.filters(filterString);
        }

        final VariantContext outputVC = builder.make();
        if( !EXCLUDE_FILTERED || outputVC.isNotFiltered() ) {
            vcfWriter.add( outputVC );
        }
    } else { // valid VC but not compatible with this mode, so just emit the variant untouched
        vcfWriter.add( vc );
    }
}
 
Example #19
Source File: CalculateGenotypePosteriors.java    From gatk with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
@Override
public void apply(final VariantContext variant,
                  final ReadsContext readsContext,
                  final ReferenceContext referenceContext,
                  final FeatureContext featureContext) {

    final Collection<VariantContext> otherVCs = featureContext.getValues(supportVariants);

    //If no resource contains a matching variant, then add numRefIfMissing as a pseudocount to the priors
    List<VariantContext> resourcesWithMatchingStarts = otherVCs.stream()
            .filter(vc -> variant.getStart() == vc.getStart()).collect(Collectors.toList());

    //do family priors first (if applicable)
    final VariantContextBuilder builder = new VariantContextBuilder(variant);
    //only compute family priors for biallelelic sites
    if (!skipFamilyPriors && variant.isBiallelic()){
        final GenotypesContext gc = famUtils.calculatePosteriorGLs(variant);
        builder.genotypes(gc);
    }
    VariantContextUtils.calculateChromosomeCounts(builder, false);
    final VariantContext vc_familyPriors = builder.make();

    final VariantContext vc_bothPriors;
    if (!skipPopulationPriors) {
        vc_bothPriors = PosteriorProbabilitiesUtils.calculatePosteriorProbs(vc_familyPriors, resourcesWithMatchingStarts,
                resourcesWithMatchingStarts.isEmpty() ? numRefIfMissing : 0, options);
    } else {
        vc_bothPriors = vc_familyPriors;
    }
    vcfWriter.add(vc_bothPriors);
}
 
Example #20
Source File: SelectVariants.java    From gatk with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
/**
 * Helper method to subset a VC record, modifying some metadata stored in the INFO field (i.e. AN, AC, AF).
 *
 * @param vc       the VariantContext record to subset
 * @param preserveAlleles should we trim constant sequence from the beginning and/or end of all alleles, or preserve it?
 * @param removeUnusedAlternates removes alternate alleles with AC=0
 * @return the subsetted VariantContext
 */
private VariantContext subsetRecord(final VariantContext vc, final boolean preserveAlleles, final boolean removeUnusedAlternates) {
    //subContextFromSamples() always decodes the vc, which is a fairly expensive operation.  Avoid if possible
    if (noSamplesSpecified && !removeUnusedAlternates) {
        return vc;
    }

    // strip out the alternate alleles that aren't being used
    final VariantContext sub = vc.subContextFromSamples(samples, removeUnusedAlternates);

    // If no subsetting happened, exit now
    if (sub.getNSamples() == vc.getNSamples() && sub.getNAlleles() == vc.getNAlleles()) {
        return vc;
    }

    // fix the PL and AD values if sub has fewer alleles than original vc and remove a fraction of the genotypes if needed
    final GenotypesContext oldGs = sub.getGenotypes();
    GenotypesContext newGC = sub.getNAlleles() == vc.getNAlleles() ? oldGs :
            AlleleSubsettingUtils.subsetAlleles(oldGs, 0, vc.getAlleles(), sub.getAlleles(), GenotypeAssignmentMethod.DO_NOT_ASSIGN_GENOTYPES, vc.getAttributeAsInt(VCFConstants.DEPTH_KEY, 0));

    if (fractionGenotypes > 0) {
        final List<Genotype> genotypes = newGC.stream().map(genotype -> randomGenotypes.nextDouble() > fractionGenotypes ? genotype :
                new GenotypeBuilder(genotype).alleles(getNoCallAlleles(genotype.getPloidy())).noGQ().make()).collect(Collectors.toList());
        newGC = GenotypesContext.create(new ArrayList<>(genotypes));
    }

    // since the VC has been subset (either by sample or allele), we need to strip out the MLE tags
    final VariantContextBuilder builder = new VariantContextBuilder(sub);
    builder.rmAttributes(Arrays.asList(GATKVCFConstants.MLE_ALLELE_COUNT_KEY,GATKVCFConstants.MLE_ALLELE_FREQUENCY_KEY));
    builder.genotypes(newGC);
    addAnnotations(builder, vc, sub.getSampleNames());
    final VariantContext subset = builder.make();

    return preserveAlleles? subset : GATKVariantContextUtils.trimAlleles(subset,true,true);
}
 
Example #21
Source File: FuncotatorTestUtils.java    From gatk with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
/**
 * See {@link FuncotatorTestUtils#createDummySegmentVariantContext()}, but this allows caller to specify attributes
 *   to add.
 *
 * @param attributes Never {@code null}
 * @return Never {@code null}
 */
public static VariantContext createDummySegmentVariantContextWithAttributes(final Map<String, String> attributes) {
    Utils.nonNull(attributes);
    return new VariantContextBuilder(createSimpleVariantContext(FuncotatorReferenceTestUtils.retrieveHg19Chr3Ref(),
            "3", 2100000, 3200000, "T",
            AnnotatedIntervalToSegmentVariantContextConverter.COPY_NEUTRAL_ALLELE.getDisplayString()))
            .attributes(attributes).make();
}
 
Example #22
Source File: AlleleFilterUtils.java    From gatk with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
/**
 * Adds the new allele filters to the existing allele filters in the vc. Computes whether there are
 * new site filters and updates the filter in the vc. If there are no site filters, sets filters to pass
 * Sets the filters for each allele and calculates the intersection of the allele filters to set on the variant.
 * PASS if the intersection is empty.
 * @param vc The variant context to add the filters to, both at the allele and site level
 * @param newAlleleFilters filters to be applied to each allele, the intersection of these filters are applied at the site level
 * @param invalidatePreviousFilters whether existing filters should be removed
 * @return The updated variant context
 */
public static VariantContext addAlleleAndSiteFilters(VariantContext vc, List<Set<String>> newAlleleFilters, boolean invalidatePreviousFilters) {
    if (newAlleleFilters.isEmpty()) {
        return vc;
    }
    List<List<String>> currentAlleleFilters = decodeASFilters(vc);
    if (!currentAlleleFilters.isEmpty() && newAlleleFilters.size() != currentAlleleFilters.size()) {
        // log an error
        return vc;
    }

    if (currentAlleleFilters.isEmpty() || invalidatePreviousFilters) {
        currentAlleleFilters = new ArrayList<>(Collections.nCopies(newAlleleFilters.size(), Collections.singletonList(GATKVCFConstants.SITE_LEVEL_FILTERS)));
    }
    ListIterator<List<String>> currentAlleleFiltersIt = currentAlleleFilters.listIterator();
    List<List<String>> updatedAlleleFilters = newAlleleFilters.stream().map(newfilters -> addAlleleFilters(currentAlleleFiltersIt.next(), newfilters.stream().collect(Collectors.toList()))).collect(Collectors.toList());
    String encodedFilters = encodeASFilters(updatedAlleleFilters);
    VariantContextBuilder vcb = new VariantContextBuilder(vc).attribute(GATKVCFConstants.AS_FILTER_STATUS_KEY, encodedFilters);

    if (invalidatePreviousFilters) {
        vcb.unfiltered();
    }
    Set<String> siteFiltersToAdd = newAlleleFilters.stream().skip(1)
            .collect(()->new HashSet<>(newAlleleFilters.get(0)), Set::retainAll, Set::retainAll);
    siteFiltersToAdd.stream().forEach(filter -> vcb.filter(filter));
    if ((vcb.getFilters() == null || vcb.getFilters().isEmpty()) && !invalidatePreviousFilters) {
        vcb.passFilters();
    }
    return vcb.make();
}
 
Example #23
Source File: LiftoverUtils.java    From picard with MIT License 5 votes vote down vote up
protected static VariantContextBuilder reverseComplementVariantContext(final VariantContext source, final Interval target, final ReferenceSequence refSeq) {
    if (target.isPositiveStrand()) {
        throw new IllegalArgumentException("This should only be called for negative strand liftovers");
    }
    final int start = target.getStart();
    final int stop = target.getEnd();

    final List<Allele> origAlleles = new ArrayList<>(source.getAlleles());
    final VariantContextBuilder vcb = new VariantContextBuilder(source);

    vcb.rmAttribute(VCFConstants.END_KEY)
            .chr(target.getContig())
            .start(start)
            .stop(stop)
            .alleles(reverseComplementAlleles(origAlleles));

    if (isIndelForLiftover(source)) {
        // check that the reverse complemented bases match the new reference
        if (referenceAlleleDiffersFromReferenceForIndel(vcb.getAlleles(), refSeq, start, stop)) {
            return null;
        }
        leftAlignVariant(vcb, start, stop, vcb.getAlleles(), refSeq);
    }

    vcb.genotypes(fixGenotypes(source.getGenotypes(), origAlleles, vcb.getAlleles()));

    return vcb;
}
 
Example #24
Source File: LiftoverUtils.java    From picard with MIT License 5 votes vote down vote up
protected static VariantContextBuilder liftSimpleVariantContext(VariantContext source, Interval target) {
    if (target == null || source.getReference().length() != target.length()) {
        return null;
    }

    // Build the new variant context.  Note: this will copy genotypes, annotations and filters
    final VariantContextBuilder builder = new VariantContextBuilder(source);
    builder.chr(target.getContig());
    builder.start(target.getStart());
    builder.stop(target.getEnd());

    return builder;
}
 
Example #25
Source File: SegmentedCpxVariantSimpleVariantExtractor.java    From gatk with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
private void extractDeletions( final ReferenceMultiSparkSource reference, final Set<SimpleInterval> missingSegments,
                               final List<VariantContextBuilder> result) {
    final List<VariantContextBuilder> deletions = compactifyMissingSegments(missingSegments).stream()
            .filter(gone -> gone.size() > EVENT_SIZE_THRESHOLD) // large enough
            .map(gone -> {
                final byte[] ref = getReferenceBases(SVUtils.makeOneBpInterval(gone.getContig(), gone.getStart()), reference);
                final Allele refAllele = Allele.create(ref, true);
                return makeDeletion(new SimpleInterval(gone.getContig(), gone.getStart(), gone.getEnd() - 1), refAllele);
            })
            .collect(Collectors.toList());
    result.addAll(deletions);
}
 
Example #26
Source File: ReadThreadingAssemblerUnitTest.java    From gatk with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
@Test(dataProvider = "AssembleIntervalsWithVariantData")
public void testAssembleRefAndInsertion(final ReadThreadingAssembler assembler, final SimpleInterval loc, final int nReadsToUse, final int variantSite) {
    final byte[] refBases = seq.getSubsequenceAt(loc.getContig(), loc.getStart(), loc.getEnd()).getBases();
    for ( int insertionLength = 1; insertionLength < 10; insertionLength++ ) {
        final Allele refBase = Allele.create(refBases[variantSite], false);
        final Allele altBase = Allele.create(new String(refBases).substring(variantSite, variantSite + insertionLength + 1), true);
        final VariantContextBuilder vcb = new VariantContextBuilder("x", loc.getContig(), variantSite, variantSite + insertionLength, Arrays.asList(refBase, altBase));
        testAssemblyWithVariant(assembler, refBases, loc, nReadsToUse, vcb.make());
    }
}
 
Example #27
Source File: LiftoverVcf.java    From picard with MIT License 5 votes vote down vote up
/**
 *  utility function to attempt to add a variant. Checks that the reference allele still matches the reference (which may have changed)
 *
 * @param vc new {@link VariantContext}
 * @param refSeq {@link ReferenceSequence} of new reference
 * @param source the original {@link VariantContext} to use for putting the original location information into vc
 */
private void tryToAddVariant(final VariantContext vc, final ReferenceSequence refSeq, final VariantContext source) {
    if (!refSeq.getName().equals(vc.getContig())) {
        throw new IllegalStateException("The contig of the VariantContext, " + vc.getContig() + ", doesnt match the ReferenceSequence: " + refSeq.getName());
    }

    // Check that the reference allele still agrees with the reference sequence
    final boolean mismatchesReference;
    final Allele allele = vc.getReference();
    final byte[] ref = refSeq.getBases();
    final String refString = StringUtil.bytesToString(ref, vc.getStart() - 1, vc.getEnd() - vc.getStart() + 1);

    if (!refString.equalsIgnoreCase(allele.getBaseString())) {
        // consider that the ref and the alt may have been swapped in a simple biallelic SNP
        if (vc.isBiallelic() && vc.isSNP() && refString.equalsIgnoreCase(vc.getAlternateAllele(0).getBaseString())) {
            totalTrackedAsSwapRefAlt++;
            if (RECOVER_SWAPPED_REF_ALT) {
                addAndTrack(LiftoverUtils.swapRefAlt(vc, TAGS_TO_REVERSE, TAGS_TO_DROP), source);
                return;
            }
        }
        mismatchesReference = true;
    } else {
        mismatchesReference = false;
    }


    if (mismatchesReference) {
        rejectedRecords.add(new VariantContextBuilder(source)
                .filter(FILTER_MISMATCHING_REF_ALLELE)
                .attribute(ATTEMPTED_LOCUS, String.format("%s:%d-%d", vc.getContig(), vc.getStart(), vc.getEnd()))
                .attribute(ATTEMPTED_ALLELES, vc.getReference().toString() + "->" + String.join(",", vc.getAlternateAlleles().stream().map(Allele::toString).collect(Collectors.toList())))
                .make());
        failedAlleleCheck++;
        trackLiftedVariantContig(rejectsByContig, source.getContig());
    } else {
        addAndTrack(vc, source);
    }
}
 
Example #28
Source File: FuncotatorTestUtils.java    From gatk with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
/**
 *  Create a variant context with the following fields.  Note that the genotype will be empty, as will
 *  INFO annotations.
 *
 * @param reference E.g. {@link FuncotatorReferenceTestUtils#retrieveHg19Chr3Ref}
 * @param contig Never {@code null}
 * @param start Must be positive.
 * @param end Must be positive.
 * @param refAlleleString Valid {@link String} for an allele.  Do not include "*".  Never {@code null}
 * @param altAlleleString Valid {@link String} for an allele.  Do not include "*".  Never {@code null}
 * @param filterString Valid {@link String} for filters (See VCF 4.2 spec).  May be {@code null}.
 * @return a simple, biallelic variant context
 */
public static VariantContext createSimpleVariantContext(final String reference, final String contig,
                                                        final int start,
                                                        final int end,
                                                        final String refAlleleString,
                                                        final String altAlleleString,
                                                        final String filterString) {
    Utils.nonNull(contig);
    Utils.nonNull(refAlleleString);
    Utils.nonNull(altAlleleString);
    ParamUtils.isPositive(start, "Invalid start position: " + start);
    ParamUtils.isPositive(end, "Invalid end position: " + end);

    final Allele refAllele = Allele.create(refAlleleString, true);
    final Allele altAllele = Allele.create(altAlleleString);

    final VariantContextBuilder variantContextBuilder = new VariantContextBuilder(
            reference,
            contig,
            start,
            end,
            Arrays.asList(refAllele, altAllele)
    );

    if ( filterString != null ) {
        variantContextBuilder.filter(filterString);
    }
    else {
        variantContextBuilder.passFilters();
    }

    final VariantContext vc = variantContextBuilder.make();

    return vc;
}
 
Example #29
Source File: SegmentedCpxVariantSimpleVariantExtractor.java    From gatk with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
private void extractFrontAndRearInsertions(final VariantContext complexVC, final List<SimpleInterval> refSegmentIntervals,
                                           final List<String> altArrangement, final ReferenceMultiSparkSource reference,
                                           final List<VariantContextBuilder> result) {
    final List<Integer> refSegmentLengths = refSegmentIntervals.stream().map(SimpleInterval::size).collect(Collectors.toList());
    // index pointing to first appearance of ref segment (inverted or not) in altArrangement, from either side
    int firstRefSegmentIdx = 0; // first front
    for (final String description : altArrangement) {
        if ( descriptionIndicatesInsertion(description)) {
            ++firstRefSegmentIdx;
        } else {
            break;
        }
    }
    if (firstRefSegmentIdx > 0) {
        final SimpleInterval startAndStop = SVUtils.makeOneBpInterval(complexVC.getContig(), complexVC.getStart());
        final Allele anchorBaseRefAlleleFront = Allele.create(getReferenceBases(startAndStop, reference), true);
        final VariantContextBuilder frontIns = getInsFromOneEnd(true, firstRefSegmentIdx, startAndStop, anchorBaseRefAlleleFront, refSegmentLengths, altArrangement, true);
        if (frontIns != null) result.add( frontIns );
    }

    firstRefSegmentIdx = altArrangement.size() - 1; // then end
    for (int i = altArrangement.size() - 1; i > -1 ; --i) {
        if ( descriptionIndicatesInsertion(altArrangement.get(i))) {
            --firstRefSegmentIdx;
        } else {
            break;
        }
    }

    if (firstRefSegmentIdx != altArrangement.size() - 1) {
        final int pos = complexVC.getEnd();
        final SimpleInterval insertionPos = SVUtils.makeOneBpInterval(complexVC.getContig(), pos);
        final Allele anchorBaseRefAlleleRear = Allele.create(getReferenceBases(insertionPos, reference), true);
        final VariantContextBuilder rearIns = getInsFromOneEnd(false, firstRefSegmentIdx, insertionPos, anchorBaseRefAlleleRear, refSegmentLengths, altArrangement, true);
        if (rearIns != null) result.add( rearIns );
    }
}
 
Example #30
Source File: GenomicsDBImportIntegrationTest.java    From gatk with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
private static File createInputVCF(final String sampleName) {
    final String contig = "chr20";
    final SAMSequenceDictionary dict = new SAMSequenceDictionary(
            Collections.singletonList(new SAMSequenceRecord(contig, 64444167)));

    final VCFFormatHeaderLine formatField = new VCFFormatHeaderLine(SAMPLE_NAME_KEY, 1, VCFHeaderLineType.String,
                                                                    "the name of the sample this genotype came from");
    final Set<VCFHeaderLine> headerLines = new HashSet<>();
    headerLines.add(formatField);
    headerLines.add(new VCFFormatHeaderLine(ANOTHER_ATTRIBUTE_KEY, 1, VCFHeaderLineType.Integer, "Another value"));
    headerLines.add(VCFStandardHeaderLines.getFormatLine("GT"));

    final File out = createTempFile(sampleName +"_", ".vcf");
    try (final VariantContextWriter writer = GATKVariantContextUtils.createVCFWriter(out.toPath(), dict, false,
                                                                                     Options.INDEX_ON_THE_FLY)) {
        final VCFHeader vcfHeader = new VCFHeader(headerLines, Collections.singleton(sampleName));
        vcfHeader.setSequenceDictionary(dict);
        writer.writeHeader(vcfHeader);
        final Allele Aref = Allele.create("A", true);
        final Allele C = Allele.create("C");
        final List<Allele> alleles = Arrays.asList(Aref, C);
        final VariantContext variant = new VariantContextBuilder("invented", contig, INTERVAL.get(0).getStart(), INTERVAL.get(0).getStart(), alleles)
                .genotypes(new GenotypeBuilder(sampleName, alleles).attribute(SAMPLE_NAME_KEY, sampleName)
                                   .attribute(ANOTHER_ATTRIBUTE_KEY, 10).make())
                .make();
        writer.add(variant);
        return out;
    }
}