htsjdk.variant.variantcontext.GenotypeBuilder Java Examples

The following examples show how to use htsjdk.variant.variantcontext.GenotypeBuilder. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: AmberVCF.java    From hmftools with GNU General Public License v3.0 6 votes vote down vote up
@NotNull
private VariantContext create(@NotNull final BaseDepth snp) {
    final List<Allele> alleles = Lists.newArrayList();
    alleles.add(Allele.create(snp.ref().toString(), true));
    alleles.add(Allele.create(snp.alt().toString(), false));

    final List<Integer> adField = Lists.newArrayList();
    adField.add(snp.refSupport());
    adField.add(snp.altSupport());

    final Genotype normal = new GenotypeBuilder(config.primaryReference()).DP(snp.readDepth())
            .AD(adField.stream().mapToInt(i -> i).toArray())
            .alleles(alleles)
            .make();

    final VariantContextBuilder builder = new VariantContextBuilder().chr(snp.chromosome())
            .start(snp.position())
            .computeEndFromAlleles(alleles, (int) snp.position())
            .genotypes(normal)
            .alleles(alleles);

    return builder.make();
}
 
Example #2
Source File: SelectVariants.java    From gatk with BSD 3-Clause "New" or "Revised" License 6 votes vote down vote up
private VariantContext buildVariantContextWithDroppedAnnotationsRemoved(final VariantContext vc) {
    if (infoAnnotationsToDrop.isEmpty() && genotypeAnnotationsToDrop.isEmpty()) {
        return vc;
    }
    final VariantContextBuilder rmAnnotationsBuilder = new VariantContextBuilder(vc);
    for (String infoField : infoAnnotationsToDrop) {
        rmAnnotationsBuilder.rmAttribute(infoField);
    }
    if (!genotypeAnnotationsToDrop.isEmpty()) {
        final ArrayList<Genotype> genotypesToWrite = new ArrayList<>();
        for (Genotype genotype : vc.getGenotypes()) {
            final GenotypeBuilder genotypeBuilder = new GenotypeBuilder(genotype).noAttributes();
            final Map<String, Object> attributes = new HashMap<>(genotype.getExtendedAttributes());
            for (String genotypeAnnotation : genotypeAnnotationsToDrop) {
                attributes.remove(genotypeAnnotation);
            }
            genotypeBuilder.attributes(attributes);
            genotypesToWrite.add(genotypeBuilder.make());
        }
        rmAnnotationsBuilder.genotypes(GenotypesContext.create(genotypesToWrite));
    }
    return rmAnnotationsBuilder.make();
}
 
Example #3
Source File: EvaluateCopyNumberTriStateCalls.java    From gatk-protected with BSD 3-Clause "New" or "Revised" License 6 votes vote down vote up
private Genotype buildAndAnnotateTruthOverlappingGenotype(final String sample, final VariantContext truth, final List<VariantContext> calls,
                                                          final TargetCollection<Target> targets) {
    final Genotype truthGenotype = truth.getGenotype(sample);
    // if there is no truth genotype for that sample, we output the "empty" genotype.
    if (truthGenotype == null) {
        return GenotypeBuilder.create(sample, Collections.emptyList());
    }
    final int truthCopyNumber = GATKProtectedVariantContextUtils.getAttributeAsInt(truthGenotype,
            GS_COPY_NUMBER_FORMAT_KEY, truthNeutralCopyNumber);
    final CopyNumberTriStateAllele truthAllele = copyNumberToTrueAllele(truthCopyNumber);

    final List<Pair<VariantContext, Genotype>> allCalls = calls.stream()
            .map(vc -> new ImmutablePair<>(vc, vc.getGenotype(sample)))
            .filter(pair -> pair.getRight() != null)
            .filter(pair -> GATKProtectedVariantContextUtils.getAttributeAsString(pair.getRight(), XHMMSegmentGenotyper.DISCOVERY_KEY,
                    XHMMSegmentGenotyper.DISCOVERY_FALSE).equals(XHMMSegmentGenotyper.DISCOVERY_TRUE))
            .collect(Collectors.toList());

    final List<Pair<VariantContext, Genotype>> qualifiedCalls = composeQualifyingCallsList(targets, allCalls);

    return buildAndAnnotateTruthOverlappingGenotype(sample, targets, truthGenotype, truthCopyNumber,
                truthAllele, qualifiedCalls);
}
 
Example #4
Source File: PerAlleleAnnotation.java    From gatk-protected with BSD 3-Clause "New" or "Revised" License 6 votes vote down vote up
/**
 * Calculate annotations for eah allele based on given VariantContext and likelihoods for a given genotype's sample
 * and add the annotations to the GenotypeBuilder.  See parent class docs in {@link GenotypeAnnotation}.
 */
public void annotate(final ReferenceContext ref,
                     final VariantContext vc,
                     final Genotype g,
                     final GenotypeBuilder gb,
                     final ReadLikelihoods<Allele> likelihoods) {
    Utils.nonNull(gb);
    Utils.nonNull(vc);
    if ( g == null || likelihoods == null ) {
        return;
    }

    final Map<Allele, List<Integer>> values = likelihoods.alleles().stream()
            .collect(Collectors.toMap(a -> a, a -> new ArrayList<>()));

    Utils.stream(likelihoods.bestAlleles(g.getSampleName()))
            .filter(ba -> ba.isInformative() && isUsableRead(ba.read))
            .forEach(ba -> getValueForRead(ba.read, vc).ifPresent(v -> values.get(ba.allele).add(v)));

    final int[] statistics = vc.getAlleles().stream().mapToInt(a -> aggregate(values.get(a))).toArray();
    gb.attribute(getVcfKey(), statistics);
}
 
Example #5
Source File: DepthPerAlleleBySample.java    From gatk with BSD 3-Clause "New" or "Revised" License 6 votes vote down vote up
@Override
public void annotate(final ReferenceContext ref,
                     final VariantContext vc,
                     final Genotype g,
                     final GenotypeBuilder gb,
                     final AlleleLikelihoods<GATKRead, Allele> likelihoods) {
    Utils.nonNull(gb, "gb is null");
    Utils.nonNull(vc, "vc is null");

    if ( g == null || !g.isCalled() || likelihoods == null) {
        return;
    }
    final Set<Allele> alleles = new LinkedHashSet<>(vc.getAlleles());

    // make sure that there's a meaningful relationship between the alleles in the likelihoods and our VariantContext
    Utils.validateArg(likelihoods.alleles().containsAll(alleles), () -> "VC alleles " + alleles + " not a  subset of AlleleLikelihoods alleles " + likelihoods.alleles());

    gb.AD(annotateWithLikelihoods(vc, g, alleles, likelihoods));
}
 
Example #6
Source File: StrandBiasBySample.java    From gatk with BSD 3-Clause "New" or "Revised" License 6 votes vote down vote up
@Override
public void annotate(final ReferenceContext ref,
                     final VariantContext vc,
                     final Genotype g,
                     final GenotypeBuilder gb,
                     final AlleleLikelihoods<GATKRead, Allele> likelihoods) {
    Utils.nonNull(vc);
    Utils.nonNull(g);
    Utils.nonNull(gb);

    if ( likelihoods == null || !g.isCalled() ) {
        logger.warn("Annotation will not be calculated, genotype is not called or alleleLikelihoodMap is null");
        return;
    }

    final int[][] table = FisherStrand.getContingencyTable(likelihoods, vc, 0, Arrays.asList(g.getSampleName()));

    gb.attribute(GATKVCFConstants.STRAND_BIAS_BY_SAMPLE_KEY, getContingencyArray(table));
}
 
Example #7
Source File: HomRefBlock.java    From gatk with BSD 3-Clause "New" or "Revised" License 6 votes vote down vote up
@Override
Genotype createHomRefGenotype(final String sampleName, final boolean floorBlocks) {
    final GenotypeBuilder gb = new GenotypeBuilder(sampleName, Collections.nCopies(getPloidy(), getRef()));
    gb.noAD().noPL().noAttributes(); // clear all attributes

    final int[] minPLs = getMinPLs();
    final int[] minPPs = getMinPPs();
    if (!floorBlocks) {
        gb.PL(minPLs);
        gb.GQ(GATKVariantContextUtils.calculateGQFromPLs(minPPs != null ? minPPs : minPLs));
        gb.attribute(GATKVCFConstants.MIN_DP_FORMAT_KEY, getMinDP());
    }
    else {
        gb.GQ(getGQLowerBound());
    }
    gb.DP(getMedianDP());
    if (minPPs != null) {
        gb.attribute(GATKVCFConstants.PHRED_SCALED_POSTERIORS_KEY, Utils.listFromPrimitives(minPPs));
    }

    return gb.make();
}
 
Example #8
Source File: SageVariantContextFactory.java    From hmftools with GNU General Public License v3.0 6 votes vote down vote up
@NotNull
private static Genotype createGenotype(boolean germline, @NotNull final VariantHotspot variant,
        @NotNull final ReadContextCounter counter) {
    return new GenotypeBuilder(counter.sample()).DP(counter.depth())
            .AD(new int[] { counter.refSupport(), counter.altSupport() })
            .attribute(READ_CONTEXT_QUALITY, counter.quality())
            .attribute(READ_CONTEXT_COUNT, counter.counts())
            .attribute(READ_CONTEXT_IMPROPER_PAIR, counter.improperPair())
            .attribute(READ_CONTEXT_JITTER, counter.jitter())
            .attribute(RAW_ALLELIC_DEPTH, new int[] { counter.rawRefSupport(), counter.rawAltSupport() })
            .attribute(RAW_ALLELIC_BASE_QUALITY, new int[] { counter.rawRefBaseQuality(), counter.rawAltBaseQuality() })
            .attribute(RAW_DEPTH, counter.rawDepth())
            .attribute(VCFConstants.ALLELE_FREQUENCY_KEY, counter.vaf())
            .alleles(createGenotypeAlleles(germline, variant, counter))
            .make();
}
 
Example #9
Source File: GenotypeConcordanceTest.java    From picard with MIT License 6 votes vote down vote up
@Test
public void testGenotypeConcordanceDetermineStateGq() throws Exception {
    final List<Allele> allelesNormal = makeUniqueListOfAlleles(Aref, C);
    final Genotype gtNormal = GenotypeBuilder.create(TRUTH_SAMPLE_NAME, Arrays.asList(Aref, C));
    final VariantContext vcNormal = new VariantContextBuilder("test", snpLoc, snpLocStart, snpLocStop, allelesNormal).genotypes(gtNormal).make();

    final List<Allele> allelesLowGq = makeUniqueListOfAlleles(Aref, C);
    final Genotype gtLowGq = new GenotypeBuilder(TRUTH_SAMPLE_NAME, Arrays.asList(Aref, C)).GQ(4).make();
    final VariantContext vcLowGq = new VariantContextBuilder("test", snpLoc, snpLocStart, snpLocStop, allelesLowGq).genotypes(gtLowGq).make();

    testGenotypeConcordanceDetermineState(vcLowGq, TruthState.LOW_GQ, vcNormal, CallState.HET_REF_VAR1, 20, 0);
    testGenotypeConcordanceDetermineState(vcLowGq, TruthState.HET_REF_VAR1, vcLowGq, CallState.HET_REF_VAR1, 2, 0);

    testGenotypeConcordanceDetermineState(vcNormal, TruthState.HET_REF_VAR1, vcLowGq, CallState.LOW_GQ, 20, 0);
    testGenotypeConcordanceDetermineState(vcNormal, TruthState.HET_REF_VAR1, vcLowGq, CallState.HET_REF_VAR1, 2, 0);

    testGenotypeConcordanceDetermineState(vcLowGq, TruthState.LOW_GQ, vcLowGq, CallState.LOW_GQ, 20, 0);
    testGenotypeConcordanceDetermineState(vcLowGq, TruthState.HET_REF_VAR1, vcLowGq, CallState.HET_REF_VAR1, 2, 0);
}
 
Example #10
Source File: GenotypeConcordanceTest.java    From picard with MIT License 6 votes vote down vote up
@Test
public void testGenotypeConcordanceDetermineStateDp() throws Exception {
    final List<Allele> allelesNormal = makeUniqueListOfAlleles(Aref, C);
    final Genotype gtNormal = GenotypeBuilder.create(TRUTH_SAMPLE_NAME, Arrays.asList(Aref, C));
    final VariantContext vcNormal = new VariantContextBuilder("test", snpLoc, snpLocStart, snpLocStop, allelesNormal).genotypes(gtNormal).make();

    final List<Allele> allelesLowDp = makeUniqueListOfAlleles(Aref, C);
    final Genotype gtLowDp = new GenotypeBuilder(TRUTH_SAMPLE_NAME, Arrays.asList(Aref, C)).DP(4).make();
    final VariantContext vcLowDp = new VariantContextBuilder("test", snpLoc, snpLocStart, snpLocStop, allelesLowDp).genotypes(gtLowDp).make();

    testGenotypeConcordanceDetermineState(vcLowDp, TruthState.LOW_DP, vcNormal, CallState.HET_REF_VAR1, 0, 20);
    testGenotypeConcordanceDetermineState(vcLowDp, TruthState.HET_REF_VAR1, vcLowDp, CallState.HET_REF_VAR1, 0, 2);

    testGenotypeConcordanceDetermineState(vcNormal, TruthState.HET_REF_VAR1, vcLowDp, CallState.LOW_DP, 0, 20);
    testGenotypeConcordanceDetermineState(vcNormal, TruthState.HET_REF_VAR1, vcLowDp, CallState.HET_REF_VAR1, 0, 2);

    testGenotypeConcordanceDetermineState(vcLowDp, TruthState.LOW_DP, vcLowDp, CallState.LOW_DP, 0, 20);
    testGenotypeConcordanceDetermineState(vcLowDp, TruthState.HET_REF_VAR1, vcLowDp, CallState.HET_REF_VAR1, 0, 2);
}
 
Example #11
Source File: HomRefBlockUnitTest.java    From gatk with BSD 3-Clause "New" or "Revised" License 6 votes vote down vote up
@Test
public void testToVariantContext(){
    final VariantContext vc = getVariantContext();
    final GenotypeBuilder gb = new GenotypeBuilder(SAMPLE_NAME, vc.getAlleles());
    final Genotype genotype1 = gb.GQ(15).DP(6).PL(new int[]{0, 10, 100}).make();
    final Genotype genotype2 = gb.GQ(17).DP(10).PL(new int[]{0, 5, 80}).make();

    final HomRefBlock block = getHomRefBlock(vc);
    block.add(vc.getEnd(), genotype1);
    block.add(vc.getEnd() + 1, genotype2);

    final VariantContext newVc = block.toVariantContext(SAMPLE_NAME, false);
    Assert.assertEquals(newVc.getGenotypes().size(), 1);
    final Genotype genotype = newVc.getGenotypes().get(0);
    Assert.assertEquals(genotype.getDP(),8); //dp should be median of the added DPs
    Assert.assertEquals(genotype.getGQ(), 5); //GQ should have been recalculated with the minPls
    Assert.assertTrue(genotype.getAlleles().stream().allMatch(a -> a.equals(REF)));
}
 
Example #12
Source File: StrelkaAllelicDepth.java    From hmftools with GNU General Public License v3.0 6 votes vote down vote up
@NotNull
public static VariantContext enrich(@NotNull final VariantContext context) {
    if (!context.isIndel() && !context.isSNP() ) {
        return context;
    }

    final VariantContextBuilder contextBuilder = new VariantContextBuilder(context).noGenotypes();

    final List<Allele> alleles = context.getAlleles();
    final Function<Allele, String> alleleKey = alleleKey(context);

    final List<Genotype> updatedGenotypes = Lists.newArrayList();
    for (Genotype genotype : context.getGenotypes()) {
        if (!genotype.hasAD() && hasRequiredAttributes(genotype, alleles, alleleKey)) {
            final GenotypeBuilder genotypeBuilder = new GenotypeBuilder(genotype).AD(readAD(genotype, alleles, alleleKey));
            updatedGenotypes.add(genotypeBuilder.make());
        } else {
            updatedGenotypes.add(genotype);
        }
    }

    return contextBuilder.genotypes(updatedGenotypes).make();
}
 
Example #13
Source File: AlleleFractionGenotypeAnnotationUnitTest.java    From gatk with BSD 3-Clause "New" or "Revised" License 6 votes vote down vote up
@DataProvider(name = "testUsingADDataProvider")
public Object[][] testUsingADDataProvider() {
    final Allele A = Allele.create("A", true);
    final Allele C = Allele.create("C");

    final List<Allele> AA = Arrays.asList(A, A);
    final List<Allele> AC = Arrays.asList(A, C);

    final Genotype gAC = new GenotypeBuilder("1", AC).AD(new int[]{5,5}).make();
    final Genotype gAA = new GenotypeBuilder("2", AA).AD(new int[]{10,0}).make();

    final VariantContext vc = new VariantContextBuilder("test", "20", 10, 10, AC).genotypes(Arrays.asList(gAA, gAC)).make();

    return new Object[][] {
            {vc, gAC, new double[]{0.5}},
            {vc, gAA, new double[]{0.0}}
        };
}
 
Example #14
Source File: VariantContextSingletonFilterTest.java    From Drop-seq with MIT License 6 votes vote down vote up
@Test
public void filterOutHetSinglton() {
 VariantContextBuilder b = new VariantContextBuilder();
 Allele a1 = Allele.create("A", true);
 Allele a2 = Allele.create("T", false);

 final List<Allele> allelesHet = new ArrayList<>(Arrays.asList(a1,a2));
 final List<Allele> allelesRef = new ArrayList<>(Arrays.asList(a1,a1));
 final List<Allele> allelesAlt = new ArrayList<>(Arrays.asList(a2,a2));

 // this has a het singleton.
 Collection<Genotype> genotypes = new ArrayList<>();
 genotypes.add(GenotypeBuilder.create("donor1", allelesRef));
 genotypes.add(GenotypeBuilder.create("donor2", allelesHet));
 genotypes.add(GenotypeBuilder.create("donor3", allelesRef));

 VariantContext vc = b.alleles(allelesHet).start(1).stop(1).chr("1").genotypes(genotypes).make();
 Assert.assertNotNull(vc);

 Iterator<VariantContext> underlyingIterator = Collections.emptyIterator();

 VariantContextSingletonFilter f = new VariantContextSingletonFilter(underlyingIterator, true);
 boolean t1 = f.filterOut(vc);
 Assert.assertFalse(t1);

}
 
Example #15
Source File: VariantContextSingletonFilterTest.java    From Drop-seq with MIT License 6 votes vote down vote up
@Test
public void filterOutHetSinglto32() {
 VariantContextBuilder b = new VariantContextBuilder();
 Allele a1 = Allele.create("A", true);
 Allele a2 = Allele.create("T", false);

 final List<Allele> allelesHet = new ArrayList<>(Arrays.asList(a1,a2));
 final List<Allele> allelesRef = new ArrayList<>(Arrays.asList(a1,a1));
 final List<Allele> allelesAlt = new ArrayList<>(Arrays.asList(a2,a2));

 // this does not have a het singleton because it has an alt.
 Collection<Genotype> genotypes = new ArrayList<>();
 genotypes.add(GenotypeBuilder.create("donor1", allelesRef));
 genotypes.add(GenotypeBuilder.create("donor2", allelesRef));
 genotypes.add(GenotypeBuilder.create("donor3", allelesAlt));

 VariantContext vc = b.alleles(allelesHet).start(1).stop(1).chr("1").genotypes(genotypes).make();
 Assert.assertNotNull(vc);

 Iterator<VariantContext> underlyingIterator = Collections.emptyIterator();

 VariantContextSingletonFilter f = new VariantContextSingletonFilter(underlyingIterator, true);
 boolean t1 = f.filterOut(vc);
 Assert.assertTrue(t1);

}
 
Example #16
Source File: VariantContextSingletonFilterTest.java    From Drop-seq with MIT License 6 votes vote down vote up
@Test
public void filterOutHetSinglton2() {
 VariantContextBuilder b = new VariantContextBuilder();
 Allele a1 = Allele.create("A", true);
 Allele a2 = Allele.create("T", false);

 final List<Allele> allelesHet = new ArrayList<>(Arrays.asList(a1,a2));
 final List<Allele> allelesRef = new ArrayList<>(Arrays.asList(a1,a1));
 final List<Allele> allelesAlt = new ArrayList<>(Arrays.asList(a2,a2));

 // this does not have a het singleton.
 Collection<Genotype> genotypes = new ArrayList<>();
 genotypes.add(GenotypeBuilder.create("donor1", allelesRef));
 genotypes.add(GenotypeBuilder.create("donor2", allelesHet));
 genotypes.add(GenotypeBuilder.create("donor3", allelesHet));

 VariantContext vc = b.alleles(allelesHet).start(1).stop(1).chr("1").genotypes(genotypes).make();
 Assert.assertNotNull(vc);

 Iterator<VariantContext> underlyingIterator = Collections.emptyIterator();

 VariantContextSingletonFilter f = new VariantContextSingletonFilter(underlyingIterator, true);
 boolean t1 = f.filterOut(vc);
 Assert.assertTrue(t1);

}
 
Example #17
Source File: GenotypeConcordanceTest.java    From picard with MIT License 5 votes vote down vote up
@Test
public void testGenotypeConcordanceDetermineStateNull() throws Exception {
    final List<Allele> alleles = makeUniqueListOfAlleles(Aref, C);
    final Genotype gt1 = GenotypeBuilder.create(TRUTH_SAMPLE_NAME, Arrays.asList(Aref, C));
    final VariantContext vc1 = new VariantContextBuilder("test", snpLoc, snpLocStart, snpLocStop, alleles).genotypes(gt1).make();

    testGenotypeConcordanceDetermineState(null, TruthState.MISSING, null, CallState.MISSING, 0, 0);
    testGenotypeConcordanceDetermineState(vc1, TruthState.HET_REF_VAR1, null, CallState.MISSING, 0, 0);
    testGenotypeConcordanceDetermineState(null, TruthState.MISSING, vc1, CallState.HET_REF_VAR1, 0, 0);
}
 
Example #18
Source File: LazyBCFGenotypesContext.java    From Hadoop-BAM with MIT License 5 votes vote down vote up
@Override public void setHeader(VCFHeader header) {
	genoFieldDecoders = new BCF2GenotypeFieldDecoders(header);
	fieldDict = BCF2Utils.makeDictionary(header);

	builders = new GenotypeBuilder[header.getNGenotypeSamples()];
	final List<String> genotypeSamples = header.getGenotypeSamples();
	for (int i = 0; i < builders.length; ++i)
		builders[i] = new GenotypeBuilder(genotypeSamples.get(i));

	sampleNamesInOrder = header.getSampleNamesInOrder();
	sampleNameToOffset = header.getSampleNameToOffset();
}
 
Example #19
Source File: TLODBlock.java    From gatk with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
@Override
Genotype createHomRefGenotype(final String sampleName, final boolean floorBlock) {
    final GenotypeBuilder gb = new GenotypeBuilder(sampleName, Collections.nCopies(2, getRef()));  //FIXME: for somatic stuff we output the genotype as diploid because that's familiar for human
    gb.noAD().noPL().noAttributes(); // clear all attributes

    gb.attribute(GATKVCFConstants.TUMOR_LOG_10_ODDS_KEY, minBlockLOD);
    gb.DP(getMedianDP());
    gb.attribute(GATKVCFConstants.MIN_DP_FORMAT_KEY, getMinDP());

    return gb.make();
}
 
Example #20
Source File: GenotypeConcordanceTest.java    From picard with MIT License 5 votes vote down vote up
@Test
public void testGenotypeConcordanceDetermineStateFilter() throws Exception {
    final Set<String> filters = new HashSet<String>(Arrays.asList("BAD!"));

    // Filtering on the variant context
    final List<Allele> alleles1 = makeUniqueListOfAlleles(Aref, C);
    final Genotype gt1 = GenotypeBuilder.create(TRUTH_SAMPLE_NAME, Arrays.asList(Aref, C));
    final VariantContext vcFiltered = new VariantContextBuilder("test", snpLoc, snpLocStart, snpLocStop, alleles1).genotypes(gt1).filters(filters).make();

    final List<Allele> alleles2 = makeUniqueListOfAlleles(Aref, T);
    final Genotype gt2 = GenotypeBuilder.create(TRUTH_SAMPLE_NAME, Arrays.asList(Aref, T));
    final VariantContext vcNotFiltered = new VariantContextBuilder("test", snpLoc, snpLocStart, snpLocStop, alleles2).genotypes(gt2).make();

    testGenotypeConcordanceDetermineState(vcFiltered, TruthState.VC_FILTERED, vcNotFiltered, CallState.HET_REF_VAR1, 0, 0);
    testGenotypeConcordanceDetermineState(vcNotFiltered, TruthState.HET_REF_VAR1, vcFiltered, CallState.VC_FILTERED, 0, 0);
    testGenotypeConcordanceDetermineState(vcFiltered, TruthState.VC_FILTERED, vcFiltered, CallState.VC_FILTERED, 0, 0);

    // Filtering on the genotype
    final List<String> gtFilters = new ArrayList<String>(Arrays.asList("WICKED"));
    final List<Allele> alleles3 = makeUniqueListOfAlleles(Aref, C);
    final Genotype gt3 = new GenotypeBuilder(TRUTH_SAMPLE_NAME, Arrays.asList(Aref, C)).filters(gtFilters).make();
    final VariantContext vcGtFiltered = new VariantContextBuilder("test", snpLoc, snpLocStart, snpLocStop, alleles3).genotypes(gt3).make();

    testGenotypeConcordanceDetermineState(vcGtFiltered, TruthState.GT_FILTERED, vcNotFiltered, CallState.HET_REF_VAR1, 0, 0);
    testGenotypeConcordanceDetermineState(vcNotFiltered, TruthState.HET_REF_VAR1, vcGtFiltered, CallState.GT_FILTERED, 0, 0);
    testGenotypeConcordanceDetermineState(vcGtFiltered, TruthState.GT_FILTERED, vcGtFiltered, CallState.GT_FILTERED, 0, 0);
}
 
Example #21
Source File: SelectVariants.java    From gatk with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
/**
 * Helper method to subset a VC record, modifying some metadata stored in the INFO field (i.e. AN, AC, AF).
 *
 * @param vc       the VariantContext record to subset
 * @param preserveAlleles should we trim constant sequence from the beginning and/or end of all alleles, or preserve it?
 * @param removeUnusedAlternates removes alternate alleles with AC=0
 * @return the subsetted VariantContext
 */
private VariantContext subsetRecord(final VariantContext vc, final boolean preserveAlleles, final boolean removeUnusedAlternates) {
    //subContextFromSamples() always decodes the vc, which is a fairly expensive operation.  Avoid if possible
    if (noSamplesSpecified && !removeUnusedAlternates) {
        return vc;
    }

    // strip out the alternate alleles that aren't being used
    final VariantContext sub = vc.subContextFromSamples(samples, removeUnusedAlternates);

    // If no subsetting happened, exit now
    if (sub.getNSamples() == vc.getNSamples() && sub.getNAlleles() == vc.getNAlleles()) {
        return vc;
    }

    // fix the PL and AD values if sub has fewer alleles than original vc and remove a fraction of the genotypes if needed
    final GenotypesContext oldGs = sub.getGenotypes();
    GenotypesContext newGC = sub.getNAlleles() == vc.getNAlleles() ? oldGs :
            AlleleSubsettingUtils.subsetAlleles(oldGs, 0, vc.getAlleles(), sub.getAlleles(), GenotypeAssignmentMethod.DO_NOT_ASSIGN_GENOTYPES, vc.getAttributeAsInt(VCFConstants.DEPTH_KEY, 0));

    if (fractionGenotypes > 0) {
        final List<Genotype> genotypes = newGC.stream().map(genotype -> randomGenotypes.nextDouble() > fractionGenotypes ? genotype :
                new GenotypeBuilder(genotype).alleles(getNoCallAlleles(genotype.getPloidy())).noGQ().make()).collect(Collectors.toList());
        newGC = GenotypesContext.create(new ArrayList<>(genotypes));
    }

    // since the VC has been subset (either by sample or allele), we need to strip out the MLE tags
    final VariantContextBuilder builder = new VariantContextBuilder(sub);
    builder.rmAttributes(Arrays.asList(GATKVCFConstants.MLE_ALLELE_COUNT_KEY,GATKVCFConstants.MLE_ALLELE_FREQUENCY_KEY));
    builder.genotypes(newGC);
    addAnnotations(builder, vc, sub.getSampleNames());
    final VariantContext subset = builder.make();

    return preserveAlleles? subset : GATKVariantContextUtils.trimAlleles(subset,true,true);
}
 
Example #22
Source File: DepthPerSampleHC.java    From gatk with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
@Override
public void annotate( final ReferenceContext ref,
                      final VariantContext vc,
                      final Genotype g,
                      final GenotypeBuilder gb,
                      final AlleleLikelihoods<GATKRead, Allele> likelihoods ) {
    Utils.nonNull(vc);
    Utils.nonNull(g);
    Utils.nonNull(gb);

    if ( likelihoods == null || !g.isCalled() ) {
        logger.warn("Annotation will not be calculated, genotype is not called or alleleLikelihoodMap is null");
        return;
    }

    // check that there are reads
    final String sample = g.getSampleName();
    if (likelihoods.sampleEvidenceCount(likelihoods.indexOfSample(sample)) == 0) {
        gb.DP(0);
        return;
    }

    final Set<Allele> alleles = new LinkedHashSet<>(vc.getAlleles());

    // make sure that there's a meaningful relationship between the alleles in the likelihoods and our VariantContext
    if ( !likelihoods.alleles().containsAll(alleles) ) {
        logger.warn("VC alleles " + alleles + " not a strict subset of AlleleLikelihoods alleles " + likelihoods.alleles());
        return;
    }

    // the depth for the HC is the sum of the informative alleles at this site.  It's not perfect (as we cannot
    // differentiate between reads that align over the event but aren't informative vs. those that aren't even
    // close) but it's a pretty good proxy and it matches with the AD field (i.e., sum(AD) = DP).
    final Map<Allele, List<Allele>> alleleSubset = alleles.stream().collect(Collectors.toMap(a -> a, a -> Arrays.asList(a)));
    final AlleleLikelihoods<GATKRead, Allele> subsettedLikelihoods = likelihoods.marginalize(alleleSubset);
    final int depth = (int) subsettedLikelihoods.bestAllelesBreakingTies(sample).stream().filter(ba -> ba.isInformative()).count();
    gb.DP(depth);
}
 
Example #23
Source File: OrientationBiasReadCounts.java    From gatk with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
@Override
public void annotate(final ReferenceContext refContext,
                              final VariantContext vc,
                              final Genotype g,
                              final GenotypeBuilder gb,
                              final AlleleLikelihoods<GATKRead, Allele> likelihoods){
    Utils.nonNull(gb, "gb is null");
    Utils.nonNull(vc, "vc is null");

    if (g == null || likelihoods == null) {
        return;
    }

    final Map<Allele, MutableInt> f1r2Counts = likelihoods.alleles().stream()
            .collect(Collectors.toMap(a -> a, a -> new MutableInt(0)));

    final Map<Allele, MutableInt> f2r1Counts = likelihoods.alleles().stream()
            .collect(Collectors.toMap(a -> a, a -> new MutableInt(0)));

    Utils.stream(likelihoods.bestAllelesBreakingTies(g.getSampleName()))
            .filter(ba -> ba.isInformative() && isUsableRead(ba.evidence) && BaseQualityRankSumTest.getReadBaseQuality(ba.evidence, vc).orElse(0) >= MINIMUM_BASE_QUALITY)
            .forEach(ba -> (ReadUtils.isF2R1(ba.evidence) ? f2r1Counts : f1r2Counts).get(ba.allele).increment());

    final int[] f1r2 = vc.getAlleles().stream().mapToInt(a -> f1r2Counts.get(a).intValue()).toArray();

    final int[] f2r1 = vc.getAlleles().stream().mapToInt(a -> f2r1Counts.get(a).intValue()).toArray();

    gb.attribute(GATKVCFConstants.F1R2_KEY, f1r2);
    gb.attribute(GATKVCFConstants.F2R1_KEY, f2r1);
}
 
Example #24
Source File: OrientationBiasReadCounts.java    From gatk with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
/**
 *  Annotate the given variant context with the OxoG read count attributes, directly from the read pileup.
 *
 *  This method may be slow and should be considered EXPERIMENTAL, especially with regard to indels and complex/mixed
 *    variants.
 *
 * @param vc variant context for the genotype.  Necessary so that we can see all alleles.
 * @param gb genotype builder to put the annotations into.
 * @param readPileup pileup of the reads at this vc.  Note that this pileup does not have to match the
 *                   genotype.  In other words, this tool does not check that the pileup was generated from the
 *                   genotype sample.
 */
public static void annotateSingleVariant(final VariantContext vc, final GenotypeBuilder gb,
                                         final ReadPileup readPileup, int meanBaseQualityCutoff) {
    Utils.nonNull(gb, "gb is null");
    Utils.nonNull(vc, "vc is null");

    // Create a list of unique alleles
    final List<Allele> variantAllelesWithDupes = vc.getAlleles();
    final Set<Allele> alleleSet = new LinkedHashSet<>(variantAllelesWithDupes);
    final List<Allele> variantAlleles = new ArrayList<>(alleleSet);

    // Initialize the mappings
    final Map<Allele, MutableInt> f1r2Counts = variantAlleles.stream()
            .collect(Collectors.toMap(Function.identity(), a -> new MutableInt(0)));

    final Map<Allele, MutableInt> f2r1Counts = variantAlleles.stream()
            .collect(Collectors.toMap(Function.identity(), a -> new MutableInt(0)));

    final List<Allele> referenceAlleles = variantAlleles.stream().filter(a -> a.isReference() && !a.isSymbolic()).collect(Collectors.toList());
    final List<Allele> altAlleles = variantAlleles.stream().filter(a -> a.isNonReference() && !a.isSymbolic()).collect(Collectors.toList());

    if (referenceAlleles.size() != 1) {
        logger.warn("Number of reference alleles does not equal  for VC: " + vc);
    }

    // We MUST have exactly 1 non-symbolic reference allele and a read pileup,
    if ((referenceAlleles.size() == 1) && (readPileup != null) && !referenceAlleles.get(0).isSymbolic()) {
        final Allele referenceAllele = referenceAlleles.get(0);
        Utils.stream(readPileup)
                .filter(pe -> isUsableRead(pe.getRead()))
                .forEach(pe -> incrementCounts(pe, f1r2Counts, f2r1Counts, referenceAllele, altAlleles, meanBaseQualityCutoff));
    }

    final int[] f1r2 = variantAlleles.stream().mapToInt(a -> f1r2Counts.get(a).intValue()).toArray();

    final int[] f2r1 = variantAlleles.stream().mapToInt(a -> f2r1Counts.get(a).intValue()).toArray();

    gb.attribute(GATKVCFConstants.F1R2_KEY, f1r2);
    gb.attribute(GATKVCFConstants.F2R1_KEY, f2r1);
}
 
Example #25
Source File: VariantsSparkSinkUnitTest.java    From gatk with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
@Test(dataProvider = "gvcfCases")
public void testEnableDisableGVCFWriting(boolean writeGvcf, String extension) throws IOException {
    List<VariantContext> vcs = new ArrayList<>();
    for(int i = 1; i <= 10; i++) {
        final Allele A = Allele.create("A", true);
        final VariantContext vc = new VariantContextBuilder("hand crafted", "1", i, i,
                                                            Arrays.asList(A, Allele.NON_REF_ALLELE))
                .genotypes(new GenotypeBuilder(SAMPLE).alleles(Arrays.asList(A, A)).DP(10).GQ(10).PL(new int[]{0, 60, 10}).make())
                .make();
        vcs.add(vc);
    }

    final JavaSparkContext ctx = SparkContextFactory.getTestSparkContext();
    final File output = createTempFile(outputFileName, extension);
    VariantsSparkSink.writeVariants(ctx, output.toString(), ctx.parallelize(vcs), getHeader(), writeGvcf, Arrays.asList(100), 2, 1, true, true);

    checkFileExtensionConsistentWithContents(output.toString(), true);

    new CommandLineProgramTest(){
        @Override
        public String getTestedToolName(){
            return IndexFeatureFile.class.getSimpleName();
        }
    }.runCommandLine(new String[]{"-I", output.getAbsolutePath()});

    final List<VariantContext> writtenVcs = readVariants(output.toString());
    //if we are actually writing a gvcf, all the variant blocks will be merged into a single homref block with
    Assert.assertEquals(writtenVcs.size(), writeGvcf ? 1 : 10);
    Assert.assertEquals(writtenVcs.stream().mapToInt(VariantContext::getStart).min().getAsInt(), 1);
    Assert.assertEquals(writtenVcs.stream().mapToInt(VariantContext::getEnd).max().getAsInt(), 10);

}
 
Example #26
Source File: HomRefBlockUnitTest.java    From gatk with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
@Test
public void testMinMedian() {
    final VariantContext vc = getVariantContext();
    final HomRefBlock band = getHomRefBlock(vc);
    final GenotypeBuilder gb = new GenotypeBuilder(SAMPLE_NAME);
    gb.alleles(vc.getAlleles());

    int pos = band.getStart();
    band.add(pos++, gb.DP(10).GQ(11).PL(new int[]{0,11,100}).make());
    Assert.assertEquals(band.getEnd(), pos - 1);
    assertValues(band, 10, 10);

    band.add(pos++, gb.DP(11).GQ(10).PL(new int[]{0, 10, 100}).make());
    Assert.assertEquals(band.getEnd(), pos - 1);
    assertValues(band, 10, 11);

    band.add(pos++, gb.DP(12).GQ(12).PL(new int[]{0,12,100}).make());
    Assert.assertEquals(band.getEnd(), pos - 1);
    assertValues(band, 10, 11);

    band.add(pos++, gb.DP(13).GQ(15).PL(new int[]{0,15,100}).make());
    Assert.assertEquals(band.getEnd(), pos - 1);
    band.add(pos++, gb.DP(14).GQ(16).PL(new int[]{0,16,100}).make());
    Assert.assertEquals(band.getEnd(), pos - 1);
    band.add(pos++, gb.DP(15).GQ(17).PL(new int[]{0,17,100}).make());
    Assert.assertEquals(band.getEnd(), pos - 1);
    band.add(pos++, gb.DP(16).GQ(18).PL(new int[]{0,18,100}).make());
    Assert.assertEquals(band.getEnd(), pos - 1);
    assertValues(band, 10, 13);
    Assert.assertEquals(band.getSize(), pos - vc.getStart());
    Assert.assertEquals(band.getMinPLs(), new int[]{0, 10, 100});
}
 
Example #27
Source File: HaplotypeCallerIntegrationTest.java    From gatk with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
private static String keyForVariant( final VariantContext variant ) {
    Genotype genotype = variant.getGenotype(0);
    if (genotype.isPhased()) { // unphase it for comparisons, since we rely on comparing the genotype string below
        genotype = new GenotypeBuilder(genotype).phased(false).make();
    }
    return String.format("%s:%d-%d %s %s", variant.getContig(), variant.getStart(), variant.getEnd(),
            variant.getAlleles(), genotype.getGenotypeString(false));
}
 
Example #28
Source File: GenomicsDBImportIntegrationTest.java    From gatk with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
private static File createInputVCF(final String sampleName) {
    final String contig = "chr20";
    final SAMSequenceDictionary dict = new SAMSequenceDictionary(
            Collections.singletonList(new SAMSequenceRecord(contig, 64444167)));

    final VCFFormatHeaderLine formatField = new VCFFormatHeaderLine(SAMPLE_NAME_KEY, 1, VCFHeaderLineType.String,
                                                                    "the name of the sample this genotype came from");
    final Set<VCFHeaderLine> headerLines = new HashSet<>();
    headerLines.add(formatField);
    headerLines.add(new VCFFormatHeaderLine(ANOTHER_ATTRIBUTE_KEY, 1, VCFHeaderLineType.Integer, "Another value"));
    headerLines.add(VCFStandardHeaderLines.getFormatLine("GT"));

    final File out = createTempFile(sampleName +"_", ".vcf");
    try (final VariantContextWriter writer = GATKVariantContextUtils.createVCFWriter(out.toPath(), dict, false,
                                                                                     Options.INDEX_ON_THE_FLY)) {
        final VCFHeader vcfHeader = new VCFHeader(headerLines, Collections.singleton(sampleName));
        vcfHeader.setSequenceDictionary(dict);
        writer.writeHeader(vcfHeader);
        final Allele Aref = Allele.create("A", true);
        final Allele C = Allele.create("C");
        final List<Allele> alleles = Arrays.asList(Aref, C);
        final VariantContext variant = new VariantContextBuilder("invented", contig, INTERVAL.get(0).getStart(), INTERVAL.get(0).getStart(), alleles)
                .genotypes(new GenotypeBuilder(sampleName, alleles).attribute(SAMPLE_NAME_KEY, sampleName)
                                   .attribute(ANOTHER_ATTRIBUTE_KEY, 10).make())
                .make();
        writer.add(variant);
        return out;
    }
}
 
Example #29
Source File: AllelicDepthTest.java    From hmftools with GNU General Public License v3.0 5 votes vote down vote up
@Test
public void allelicDepthFromAd() {
    final Genotype genotype = new GenotypeBuilder("SAMPLE").AD(new int[] { 6, 4 }).make();
    final AllelicDepth victim = AllelicDepth.fromGenotype(genotype);
    assertEquals(10, victim.totalReadCount());
    assertEquals(0.4, victim.alleleFrequency(), 0.01);
}
 
Example #30
Source File: AllelicDepthTest.java    From hmftools with GNU General Public License v3.0 5 votes vote down vote up
@Test
public void useADIfLargerThanDP() {
    final Genotype genotype = new GenotypeBuilder("SAMPLE").AD(new int[] { 6, 4 }).DP(5).make();
    final AllelicDepth victim = AllelicDepth.fromGenotype(genotype);
    assertEquals(10, victim.totalReadCount());
    assertEquals(0.4, victim.alleleFrequency(), 0.01);
}