Java Code Examples for htsjdk.variant.variantcontext.VariantContext#getGenotype()

The following examples show how to use htsjdk.variant.variantcontext.VariantContext#getGenotype() . You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: OrientationBiasFiltererUnitTest.java    From gatk-protected with BSD 3-Clause "New" or "Revised" License 6 votes vote down vote up
@Test
public void testAnnotateVariantContextWithPreprocessingValues() {
    final FeatureDataSource<VariantContext> featureDataSource = new FeatureDataSource<>(new File(smallM2Vcf));
    SortedSet<Transition> relevantTransitions = new TreeSet<>();
    relevantTransitions.add(Transition.transitionOf('G', 'T'));

    final Map<Transition, Double> preAdapterQFakeScoreMap = new HashMap<>();
    final double amGTPreAdapterQ = 20.0;
    preAdapterQFakeScoreMap.put(Transition.transitionOf('G', 'T'), amGTPreAdapterQ);  // preAdapterQ suppression will do nothing.

    for (final VariantContext vc : featureDataSource) {
        final VariantContext updatedVariantContext = OrientationBiasFilterer.annotateVariantContextWithPreprocessingValues(vc, relevantTransitions, preAdapterQFakeScoreMap);

        final Genotype genotypeTumor = updatedVariantContext.getGenotype("TUMOR");
        final Genotype genotypeNormal = updatedVariantContext.getGenotype("NORMAL");

        Assert.assertNotEquals(genotypeTumor.getExtendedAttribute(OrientationBiasFilterConstants.FOB, VCFConstants.EMPTY_ALLELE), VCFConstants.EMPTY_ALLELE);
        Assert.assertNotEquals(genotypeTumor.getExtendedAttribute(OrientationBiasFilterConstants.P_ARTIFACT_FIELD_NAME, VCFConstants.EMPTY_ALLELE), VCFConstants.EMPTY_ALLELE);

        assertArtifact(amGTPreAdapterQ, genotypeTumor, Transition.transitionOf('G', 'T'));

        // The NORMAL is always ref/ref in the example file.
        assertNormal(genotypeNormal);
    }
}
 
Example 2
Source File: Mutect2IntegrationTest.java    From gatk with BSD 3-Clause "New" or "Revised" License 6 votes vote down vote up
@Test
@SuppressWarnings("deprecation")
public void testAFAtHighDP()  {
    Utils.resetRandomGenerator();
    final File unfilteredVcf = createTempFile("unfiltered", ".vcf");

    runMutect2(DEEP_MITO_BAM, unfilteredVcf, "chrM:1-1018", MITO_REF.getAbsolutePath(), Optional.empty(),
            args -> args.add(IntervalArgumentCollection.INTERVAL_PADDING_LONG_NAME, 300));

    final List<VariantContext> variants = VariantContextTestUtils.streamVcf(unfilteredVcf).collect(Collectors.toList());

    for (final VariantContext vc : variants) {
        Assert.assertTrue(vc.isBiallelic()); //I do some lazy parsing below that won't hold for multiple alternate alleles
        final Genotype g = vc.getGenotype(DEEP_MITO_SAMPLE_NAME);
        Assert.assertTrue(g.hasAD());
        final int[] ADs = g.getAD();
        Assert.assertTrue(g.hasExtendedAttribute(GATKVCFConstants.ALLELE_FRACTION_KEY));
        //Assert.assertEquals(Double.parseDouble(String.valueOf(vc.getGenotype(DEEP_MITO_SAMPLE_NAME).getExtendedAttribute(GATKVCFConstants.ALLELE_FRACTION_KEY,"0"))), (double)ADs[1]/(ADs[0]+ADs[1]), 1e-6);
        Assert.assertEquals(Double.parseDouble(String.valueOf(vc.getGenotype(DEEP_MITO_SAMPLE_NAME).getAttributeAsString(GATKVCFConstants.ALLELE_FRACTION_KEY, "0"))), (double) ADs[1] / (ADs[0] + ADs[1]), 2e-3);
    }
}
 
Example 3
Source File: Mutect2FilteringEngine.java    From gatk-protected with BSD 3-Clause "New" or "Revised" License 6 votes vote down vote up
private void applyStrandArtifactFilter(final M2FiltersArgumentCollection MTFAC, final VariantContext vc, final Collection<String> filters) {
    Genotype tumorGenotype = vc.getGenotype(tumorSample);
    final double[] posteriorProbabilities = GATKProtectedVariantContextUtils.getAttributeAsDoubleArray(
            tumorGenotype, (StrandArtifact.POSTERIOR_PROBABILITIES_KEY), () -> null, -1);
    final double[] mapAlleleFractionEstimates = GATKProtectedVariantContextUtils.getAttributeAsDoubleArray(
            tumorGenotype, (StrandArtifact.MAP_ALLELE_FRACTIONS_KEY), () -> null, -1);

    if (posteriorProbabilities == null || mapAlleleFractionEstimates == null){
        return;
    }

    final int maxZIndex = MathUtils.maxElementIndex(posteriorProbabilities);

    if (maxZIndex == StrandArtifact.StrandArtifactZ.NO_ARTIFACT.ordinal()){
        return;
    }

    if (posteriorProbabilities[maxZIndex] > MTFAC.STRAND_ARTIFACT_POSTERIOR_PROB_THRESHOLD &&
            mapAlleleFractionEstimates[maxZIndex] < MTFAC.STRAND_ARTIFACT_ALLELE_FRACTION_THRESHOLD){
        filters.add(GATKVCFConstants.STRAND_ARTIFACT_FILTER_NAME);
    }
}
 
Example 4
Source File: PositiveAllelicDepth.java    From hmftools with GNU General Public License v3.0 5 votes vote down vote up
private void run() {
    fileWriter.writeHeader(fileReader.getFileHeader());
    for (final VariantContext context : fileReader) {

        final VariantContextBuilder builder = new VariantContextBuilder(context);
        final List<Genotype> genotypes = Lists.newArrayList();

        for (int genotypeIndex = 0; genotypeIndex < context.getGenotypes().size(); genotypeIndex++) {
            Genotype genotype = context.getGenotype(genotypeIndex);

            if (genotype.hasAD() && genotype.hasDP()) {
                int[] ad = genotype.getAD();
                int total = 0;
                boolean updateRecord = false;
                for (int i = 0; i < ad.length; i++) {
                    int adValue = ad[i];
                    if (adValue < 0) {
                        updateRecord = true;
                        ad[i] = Math.abs(adValue);
                    }
                    total += Math.abs(adValue);
                }

                if (updateRecord) {
                    LOGGER.info("Updated entry at {}:{}", context.getContig(), context.getStart());
                    Genotype updated = new GenotypeBuilder(genotype).AD(ad).DP(total).make();
                    genotypes.add(updated);
                }
                else {
                    genotypes.add(genotype);
                }
            }
        }

        builder.genotypes(genotypes);
        fileWriter.add(builder.make());
    }
}
 
Example 5
Source File: PurityAdjustedSomaticVariantFactory.java    From hmftools with GNU General Public License v3.0 5 votes vote down vote up
@NotNull
public VariantContext enrich(@NotNull final VariantContext variant) {
    final Genotype genotype = variant.getGenotype(tumorSample);
    if (genotype != null && genotype.hasAD() && HumanChromosome.contains(variant.getContig())) {
        final GenomePosition position = GenomePositions.create(variant.getContig(), variant.getStart());
        final AllelicDepth depth = AllelicDepth.fromGenotype(genotype);
        enrich(position, depth, PurityAdjustedSomaticVariantBuilder.fromVariantContex(variant));
    }
    return variant;
}
 
Example 6
Source File: HaplotypeCallerIntegrationTest.java    From gatk with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
@Test
public void testGVCFModeGenotypePosteriors() throws Exception {
    Utils.resetRandomGenerator();

    final String inputFileName = NA12878_20_21_WGS_bam;
    final String referenceFileName =b37_reference_20_21;

    final File output = createTempFile("testGVCFModeIsConsistentWithPastResults", ".g.vcf");

    final String[] args = {
            "-I", inputFileName,
            "-R", referenceFileName,
            "-L", "20:10000000-10100000",
            "-O", output.getAbsolutePath(),
            "--" + AssemblyBasedCallerArgumentCollection.EMIT_REF_CONFIDENCE_LONG_NAME, ReferenceConfidenceMode.GVCF.toString(),
            "--" + GenotypeCalculationArgumentCollection.SUPPORTING_CALLSET_LONG_NAME,
                largeFileTestDir + "1000G.phase3.broad.withGenotypes.chr20.10100000.vcf",
            "--" + GenotypeCalculationArgumentCollection.NUM_REF_SAMPLES_LONG_NAME, "2500",
            "-pairHMM", "AVX_LOGLESS_CACHING",
            "--" + AssemblyBasedCallerArgumentCollection.ALLELE_EXTENSION_LONG_NAME, "2",
            "--" + StandardArgumentDefinitions.ADD_OUTPUT_VCF_COMMANDLINE, "false"
    };

    runCommandLine(args);

    Pair<VCFHeader, List<VariantContext>> results = VariantContextTestUtils.readEntireVCFIntoMemory(output.getAbsolutePath());

    for (final VariantContext vc : results.getRight()) {
        final Genotype g = vc.getGenotype(0);
        if (g.hasDP() && g.getDP() > 0 && g.hasGQ() && g.getGQ() > 0) {
            Assert.assertTrue(g.hasExtendedAttribute(GATKVCFConstants.PHRED_SCALED_POSTERIORS_KEY));
        }
        if (isGVCFReferenceBlock(vc) ) {
            Assert.assertTrue(!vc.hasAttribute(GATKVCFConstants.GENOTYPE_PRIOR_KEY));
        }
        else if (!vc.getAlternateAllele(0).equals(Allele.NON_REF_ALLELE)){      //there are some variants that don't have non-symbolic alts
            Assert.assertTrue(vc.hasAttribute(GATKVCFConstants.GENOTYPE_PRIOR_KEY));
        }
    }
}
 
Example 7
Source File: CreateSnpIntervalFromVcf.java    From Drop-seq with MIT License 5 votes vote down vote up
/** Are all the samples listed heterozygous for this variant, and pass GQ threshold?*/
private boolean sitePassesFilters(final VariantContext ctx, final Set<String> samples, final int GQThreshold, final boolean hetSNPsOnly) {

	boolean flag = true;
	for (String sample : samples) {
		Genotype g = ctx.getGenotype(sample);
		if (g.getGQ()<GQThreshold || (!g.isHet() && hetSNPsOnly))
		 return (false);
	}
	return (flag);
}
 
Example 8
Source File: SelectVariants.java    From gatk with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
/**
 * Checks if the two variants have the same genotypes for the selected samples
 *
 * @param vc the variant rod VariantContext.
 * @param compVCs the comparison VariantContext
 * @return true if VariantContexts are concordant, false otherwise
 */
private boolean isConcordant (final VariantContext vc, final Collection<VariantContext> compVCs) {
    if (vc == null || compVCs == null || compVCs.isEmpty()) {
        return false;
    }

    // if we're not looking for specific samples then the fact that we have both VCs is enough to call it concordant.
    if (noSamplesSpecified) {
        return true;
    }

    // make a list of all samples contained in this variant VC that are being tracked by the user command line arguments.
    final Set<String> variantSamples = vc.getSampleNames();
    variantSamples.retainAll(samples);

    // check if we can find all samples from the variant rod in the comp rod.
    for (final String sample : variantSamples) {
        boolean foundSample = false;
        for (final VariantContext compVC : compVCs) {
            final Genotype varG = vc.getGenotype(sample);
            final Genotype compG = compVC.getGenotype(sample);
            if (haveSameGenotypes(varG, compG)) {
                foundSample = true;
                break;
            }
        }
        // if at least one sample doesn't have the same genotype, we don't have concordance
        if (!foundSample) {
            return false;
        }
    }
    return true;
}
 
Example 9
Source File: HaplotypeCallerIntegrationTest.java    From gatk with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
private static String keyForVariant( final VariantContext variant ) {
    Genotype genotype = variant.getGenotype(0);
    if (genotype.isPhased()) { // unphase it for comparisons, since we rely on comparing the genotype string below
        genotype = new GenotypeBuilder(genotype).phased(false).make();
    }
    return String.format("%s:%d-%d %s %s", variant.getContig(), variant.getStart(), variant.getEnd(),
            variant.getAlleles(), genotype.getGenotypeString(false));
}
 
Example 10
Source File: XHMMSegmentGenotyperIntegrationTest.java    From gatk-protected with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
private void assertVariantSegmentsAreCovered(final List<VariantContext> variants,
                                             final List<HiddenStateSegmentRecord<CopyNumberTriState, Target>> variantSegments) {
    for (final HiddenStateSegmentRecord<CopyNumberTriState, Target> variantSegment : variantSegments) {
        final Optional<VariantContext> match = variants.stream()
                .filter(vc -> new SimpleInterval(vc).equals(variantSegment.getSegment().getInterval()))
                .findFirst();
        Assert.assertTrue(match.isPresent());
        final VariantContext matchedVariant = match.get();
        final Genotype genotype = matchedVariant.getGenotype(variantSegment.getSampleName());
        final String discovery = genotype.getAnyAttribute(XHMMSegmentGenotyper.DISCOVERY_KEY).toString();
        Assert.assertTrue(discovery.equals(XHMMSegmentGenotyper.DISCOVERY_TRUE));
        final CopyNumberTriState call = variantSegment.getSegment().getCall();

        final List<Allele> gt = genotype.getAlleles();
        Assert.assertEquals(gt.size(), 1);
        // The call may not be the same for case where the event-quality is relatively low.
        if (variantSegment.getSegment().getEventQuality() > 10) {
            Assert.assertEquals(CopyNumberTriStateAllele.valueOf(gt.get(0)).state, call, genotype.toString());
        }
        final String[] SQ = genotype.getAnyAttribute(XHMMSegmentGenotyper.SOME_QUALITY_KEY).toString().split(VCFConstants.INFO_FIELD_ARRAY_SEPARATOR);
        final double someQual = variantSegment.getSegment().getSomeQuality();
        Assert.assertEquals(Double.parseDouble(SQ[call == CopyNumberTriState.DELETION ? 0 : 1]), someQual, XHMMSegmentGenotyper.PHRED_SCORE_PRECISION, variantSegment.getSampleName() + " => " + genotype.toString());

        final String[] LQ = genotype.getAnyAttribute(XHMMSegmentGenotyper.START_QUALITY_KEY).toString().split(VCFConstants.INFO_FIELD_ARRAY_SEPARATOR);
        final double startQuality = variantSegment.getSegment().getStartQuality();
        Assert.assertEquals(Double.parseDouble(LQ[call == CopyNumberTriState.DELETION ? 0 : 1]), startQuality, XHMMSegmentGenotyper.PHRED_SCORE_PRECISION, variantSegment.getSampleName() + " => " + genotype.toString());

        final String[] RQ = genotype.getAnyAttribute(XHMMSegmentGenotyper.END_QUALITY_KEY).toString().split(VCFConstants.INFO_FIELD_ARRAY_SEPARATOR);
        final double endQuality = variantSegment.getSegment().getEndQuality();
        Assert.assertEquals(Double.parseDouble(RQ[call == CopyNumberTriState.DELETION ? 0 : 1]), endQuality, XHMMSegmentGenotyper.PHRED_SCORE_PRECISION, variantSegment.getSampleName() + " => " + genotype.toString());

        // Check the PL.
        final int[] PL = genotype.getPL();
        final int observedGQFromPL = Math.min(XHMMSegmentGenotyper.MAX_GQ, PL[CopyNumberTriStateAllele.REF.index()] - PL[CopyNumberTriStateAllele.valueOf(call).index()]);
        final double expectedCallPL = GATKProtectedMathUtils.roundPhred(QualityUtils.phredScaleErrorRate(QualityUtils.qualToProb(variantSegment.getSegment().getExactQuality())), HMMPostProcessor.PHRED_SCORE_PRECISION);
        final double expectedRefPL = GATKProtectedMathUtils.roundPhred(QualityUtils.phredScaleCorrectRate(QualityUtils.qualToProb(variantSegment.getSegment().getEventQuality())), HMMPostProcessor.PHRED_SCORE_PRECISION);
        final int expectedGQFromPL = Math.min(XHMMSegmentGenotyper.MAX_GQ, (int) Math.round(expectedRefPL - expectedCallPL));
        Assert.assertTrue(Math.abs(observedGQFromPL - expectedGQFromPL) <= 1, genotype.toString() + " " + variantSegment.getSegment().getEventQuality());
    }
}
 
Example 11
Source File: Mutect2FilteringEngine.java    From gatk-protected with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
private void applyContaminationFilter(final M2FiltersArgumentCollection MTFAC, final VariantContext vc, final Collection<String> filters) {
    final Genotype tumorGenotype = vc.getGenotype(tumorSample);
    final double[] alleleFractions = GATKProtectedVariantContextUtils.getAttributeAsDoubleArray(tumorGenotype, VCFConstants.ALLELE_FREQUENCY_KEY,
            () -> new double[] {1.0}, 1.0);
    final double maxFraction = MathUtils.arrayMax(alleleFractions);
    if (maxFraction < contamination) {
        filters.add(CONTAMINATION_FILTER_NAME);
    }
}
 
Example 12
Source File: MergePedIntoVcf.java    From picard with MIT License 4 votes vote down vote up
/**
 * Writes out the VariantContext objects in the order presented to the supplied output file
 * in VCF format.
 */
private void writeVcf(final CloseableIterator<VariantContext> variants,
                      final File output,
                      final SAMSequenceDictionary dict,
                      final VCFHeader vcfHeader, ZCallPedFile zCallPedFile) {

    try(VariantContextWriter writer = new VariantContextWriterBuilder()
            .setOutputFile(output)
            .setReferenceDictionary(dict)
            .setOptions(VariantContextWriterBuilder.DEFAULT_OPTIONS)
            .build()) {
        writer.writeHeader(vcfHeader);
        while (variants.hasNext()) {
            final VariantContext context = variants.next();

            final VariantContextBuilder builder = new VariantContextBuilder(context);
            if (zCallThresholds.containsKey(context.getID())) {
                final String[] zThresh = zCallThresholds.get(context.getID());
                builder.attribute(InfiniumVcfFields.ZTHRESH_X, zThresh[0]);
                builder.attribute(InfiniumVcfFields.ZTHRESH_Y, zThresh[1]);
            }
            final Genotype originalGenotype = context.getGenotype(0);
            final Map<String, Object> newAttributes = originalGenotype.getExtendedAttributes();
            final VCFEncoder vcfEncoder = new VCFEncoder(vcfHeader, false, false);
            final Map<Allele, String> alleleMap = vcfEncoder.buildAlleleStrings(context);

            final String zCallAlleles = zCallPedFile.getAlleles(context.getID());
            if (zCallAlleles == null) {
                throw new PicardException("No zCall alleles found for snp " + context.getID());
            }
            final List<Allele> zCallPedFileAlleles = buildNewAllelesFromZCall(zCallAlleles, context.getAttributes());
            newAttributes.put(InfiniumVcfFields.GTA, alleleMap.get(originalGenotype.getAllele(0)) + UNPHASED + alleleMap.get(originalGenotype.getAllele(1)));
            newAttributes.put(InfiniumVcfFields.GTZ, alleleMap.get(zCallPedFileAlleles.get(0)) + UNPHASED + alleleMap.get(zCallPedFileAlleles.get(1)));

            final Genotype newGenotype = GenotypeBuilder.create(originalGenotype.getSampleName(), zCallPedFileAlleles,
                    newAttributes);
            builder.genotypes(newGenotype);
            logger.record("0", 0);
            // AC, AF, and AN are recalculated here
            VariantContextUtils.calculateChromosomeCounts(builder, false);
            final VariantContext newContext = builder.make();
            writer.add(newContext);
        }
    }
}
 
Example 13
Source File: EvaluateCopyNumberTriStateCallsIntegrationTest.java    From gatk-protected with BSD 3-Clause "New" or "Revised" License 4 votes vote down vote up
private void checkOutputTruthConcordance(final File truthFile, final File targetsFile, final File vcfOutput, final EvaluationFiltersArgumentCollection filteringOptions) {
    final List<VariantContext> truthVariants = readVCFFile(truthFile);
    final List<VariantContext> outputVariants = readVCFFile(vcfOutput);
    final Set<String> outputSamples = outputVariants.get(0).getSampleNames();
    final TargetCollection<Target> targets = TargetArgumentCollection.readTargetCollection(targetsFile);

    for (final VariantContext truth : truthVariants) {
        final List<Target> overlappingTargets = targets.targets(truth);
        final List<VariantContext> overlappingOutput = outputVariants.stream()
                .filter(vc -> new SimpleInterval(vc).overlaps(truth))
                .collect(Collectors.toList());

        if (overlappingTargets.isEmpty()) {
            Assert.assertTrue(overlappingOutput.isEmpty());
            continue;
        }
        Assert.assertFalse(overlappingOutput.isEmpty());
        Assert.assertEquals(overlappingOutput.stream().filter(vc -> new SimpleInterval(truth).equals(new SimpleInterval(vc))).count(), 1);
        @SuppressWarnings("all")
        final Optional<VariantContext> prospectiveMatchingOutput = overlappingOutput.stream()
                .filter(vc -> new SimpleInterval(truth).equals(new SimpleInterval(vc)))
                .findFirst();
        Assert.assertTrue(prospectiveMatchingOutput.isPresent());
        final VariantContext matchingOutput = prospectiveMatchingOutput.get();
        final int[] truthAC = calculateACFromTruth(truth);
        final long truthAN = MathUtils.sum(truthAC);
        final double[] truthAF = IntStream.of(Arrays.copyOfRange(truthAC, 1, truthAC.length)).mapToDouble(d -> d / (double) truthAN).toArray();
        Assert.assertEquals(matchingOutput.getAttributeAsInt(VariantEvaluationContext.TRUTH_ALLELE_NUMBER_KEY, -1), truthAN);
        assertEquals(GATKProtectedVariantContextUtils.getAttributeAsDoubleArray(matchingOutput, VariantEvaluationContext.TRUTH_ALLELE_FREQUENCY_KEY, () -> new double[2], 0.0),
                truthAF, 0.001);
        assertOutputVariantFilters(filteringOptions, overlappingTargets, matchingOutput, truthAF);
        for (final String sample : outputSamples) {
            final Genotype outputGenotype = matchingOutput.getGenotype(sample);
            final Genotype truthGenotype = truth.getGenotype(sample);
            final int truthCN = GATKProtectedVariantContextUtils.getAttributeAsInt(truthGenotype, ConvertGSVariantsToSegments.GS_COPY_NUMBER_FORMAT, -1);
            final int truthGT = GATKProtectedVariantContextUtils.getAttributeAsInt(outputGenotype, VariantEvaluationContext.TRUTH_GENOTYPE_KEY, -1);
            final Object truthQualObject = outputGenotype.getAnyAttribute(VariantEvaluationContext.TRUTH_QUALITY_KEY);
            Assert.assertNotNull(truthQualObject, "" + truthGenotype);
            final double truthQual = Double.parseDouble(String.valueOf(truthQualObject));
            if (truthQual < filteringOptions.minimumTruthSegmentQuality) {
                Assert.assertEquals(outputGenotype.getFilters(),EvaluationFilter.LowQuality.acronym);
            } else {
                Assert.assertTrue(outputGenotype.getFilters() == null || outputGenotype.getFilters().equals(VCFConstants.PASSES_FILTERS_v4));
            }
            final double[] truthPosteriors = GATKProtectedVariantContextUtils.getAttributeAsDoubleArray(truthGenotype, ConvertGSVariantsToSegments.GS_COPY_NUMBER_POSTERIOR,
                    () -> new double[0], Double.NEGATIVE_INFINITY);
            final double truthDelPosterior = MathUtils.log10SumLog10(truthPosteriors, 0, EvaluateCopyNumberTriStateCalls.REFERENCE_COPY_NUMBER_DEFAULT);
            final double truthRefPosterior = truthPosteriors[EvaluateCopyNumberTriStateCalls.REFERENCE_COPY_NUMBER_DEFAULT];
            final double truthDupPosterior = truthPosteriors.length < EvaluateCopyNumberTriStateCalls.REFERENCE_COPY_NUMBER_DEFAULT ? Double.NEGATIVE_INFINITY : MathUtils.log10SumLog10(truthPosteriors, EvaluateCopyNumberTriStateCalls.REFERENCE_COPY_NUMBER_DEFAULT + 1, truthPosteriors.length);

            final CopyNumberTriStateAllele truthAllele = truthGT == -1 ? null : CopyNumberTriStateAllele.ALL_ALLELES.get(truthGT);
            if (truthCN < EvaluateCopyNumberTriStateCalls.REFERENCE_COPY_NUMBER_DEFAULT) {
                Assert.assertEquals(truthAllele, CopyNumberTriStateAllele.DEL);
                Assert.assertEquals(truthQual * -.1, MathUtils.log10SumLog10(new double[] {truthRefPosterior, truthDupPosterior}), 0.01);
            } else if (truthCN > EvaluateCopyNumberTriStateCalls.REFERENCE_COPY_NUMBER_DEFAULT) {
                Assert.assertEquals(truthAllele, CopyNumberTriStateAllele.DUP);
                Assert.assertEquals(truthQual * -.1, MathUtils.log10SumLog10(new double[] {truthDelPosterior, truthRefPosterior}), 0.01, "" + truthGenotype + " " + outputGenotype);
            } else {
                Assert.assertEquals(truthAllele, CopyNumberTriStateAllele.REF);
                Assert.assertEquals(truthQual * -.1, MathUtils.log10SumLog10(new double[] {truthDelPosterior, truthDupPosterior}), 0.01);
            }
            final double outputTruthFraction = GATKProtectedVariantContextUtils.getAttributeAsDouble(outputGenotype, VariantEvaluationContext.TRUTH_COPY_FRACTION_KEY, -1);
            final double inputTruthFraction = GATKProtectedVariantContextUtils.getAttributeAsDouble(truthGenotype, ConvertGSVariantsToSegments.GS_COPY_NUMBER_FRACTION, -1);
            Assert.assertEquals(outputTruthFraction, inputTruthFraction, 0.01);
        }
    }
}
 
Example 14
Source File: SelectVariants.java    From gatk with BSD 3-Clause "New" or "Revised" License 4 votes vote down vote up
private void addAnnotations(final VariantContextBuilder builder, final VariantContext originalVC, final Set<String> selectedSampleNames) {
    if (fullyDecode) {
        return; // TODO -- annotations are broken with fully decoded data
    }

    if (keepOriginalChrCounts) {
        final int[] indexOfOriginalAlleleForNewAllele;
        final List<Allele> newAlleles = builder.getAlleles();
        final int numOriginalAlleles = originalVC.getNAlleles();

        // if the alleles already match up, we can just copy the previous list of counts
        if (numOriginalAlleles == newAlleles.size()) {
            indexOfOriginalAlleleForNewAllele = null;
        }
        // otherwise we need to parse them and select out the correct ones
        else {
            indexOfOriginalAlleleForNewAllele = new int[newAlleles.size() - 1];
            Arrays.fill(indexOfOriginalAlleleForNewAllele, -1);

            // note that we don't care about the reference allele at position 0
            for (int newI = 1; newI < newAlleles.size(); newI++) {
                final Allele newAlt = newAlleles.get(newI);
                for (int oldI = 0; oldI < numOriginalAlleles - 1; oldI++) {
                    if (newAlt.equals(originalVC.getAlternateAllele(oldI), false)) {
                        indexOfOriginalAlleleForNewAllele[newI - 1] = oldI;
                        break;
                    }
                }
            }
        }

        if (originalVC.hasAttribute(VCFConstants.ALLELE_COUNT_KEY)) {
            builder.attribute(GATKVCFConstants.ORIGINAL_AC_KEY,
                    getReorderedAttributes(originalVC.getAttribute(VCFConstants.ALLELE_COUNT_KEY), indexOfOriginalAlleleForNewAllele));
        }
        if (originalVC.hasAttribute(VCFConstants.ALLELE_FREQUENCY_KEY)) {
            builder.attribute(GATKVCFConstants.ORIGINAL_AF_KEY,
                    getReorderedAttributes(originalVC.getAttribute(VCFConstants.ALLELE_FREQUENCY_KEY), indexOfOriginalAlleleForNewAllele));
        }
        if (originalVC.hasAttribute(VCFConstants.ALLELE_NUMBER_KEY)) {
            builder.attribute(GATKVCFConstants.ORIGINAL_AN_KEY, originalVC.getAttribute(VCFConstants.ALLELE_NUMBER_KEY));
        }
    }

    VariantContextUtils.calculateChromosomeCounts(builder, false);

    if (keepOriginalDepth && originalVC.hasAttribute(VCFConstants.DEPTH_KEY)) {
        builder.attribute(GATKVCFConstants.ORIGINAL_DP_KEY, originalVC.getAttribute(VCFConstants.DEPTH_KEY));
    }

    boolean sawDP = false;
    int depth = 0;
    for (final String sample : selectedSampleNames ) {
        final Genotype g = originalVC.getGenotype(sample);
        if (!g.isFiltered()) {
            if (g.hasDP()) {
                depth += g.getDP();
                sawDP = true;
            }
        }
    }

    if (sawDP) {
        builder.attribute(VCFConstants.DEPTH_KEY, depth);
    }
}
 
Example 15
Source File: VcfToVariant.java    From genomewarp with Apache License 2.0 4 votes vote down vote up
@VisibleForTesting
static List<VariantCall> getCalls(VariantContext vc, VCFHeader header) {
  List<VariantCall> toReturn = new ArrayList<>();

  for (String currSample : header.getGenotypeSamples()) {
    if (!vc.hasGenotype(currSample)) {
      continue;
    }

    Genotype currGenotype = vc.getGenotype(currSample);
    VariantCall.Builder vcBuilder = VariantCall.newBuilder();
    vcBuilder.setCallSetName(currSample);

    // Get GT info.
    final Map<Allele, Integer> alleleStrings = buildAlleleMap(vc);
    vcBuilder.addGenotype(alleleStrings.get(currGenotype.getAllele(0)));
    for (int i = 1; i < currGenotype.getPloidy(); i++) {
      vcBuilder.addGenotype(alleleStrings.get(currGenotype.getAllele(i)));
    }

    // Set phasing (not applicable to haploid).
    if (currGenotype.isPhased() && currGenotype.getPloidy() > 1) {
      vcBuilder.setPhaseset("*");
    }

    // Get rest of the genotype info.
    Map<String, ListValue> genotypeInfo = new HashMap<>();

    // Set filters
    if (currGenotype.isFiltered()) {
      genotypeInfo.put(VCFConstants.GENOTYPE_FILTER_KEY, ListValue.newBuilder()
          .addValues(Value.newBuilder().setStringValue(currGenotype.getFilters()).build())
          .build());
    }

    for (final String field : vc.calcVCFGenotypeKeys(header)) {
      // We've already handled genotype
      if (field.equals(VCFConstants.GENOTYPE_KEY)) {
        continue;
      }

      ListValue.Builder listValueBuilder = ListValue.newBuilder();
      if (field.equals(VCFConstants.GENOTYPE_FILTER_KEY)) {
        // This field has already been dealt with
        continue;
      } else {
        final IntGenotypeFieldAccessors.Accessor accessor =
            GENOTYPE_FIELD_ACCESSORS.getAccessor(field);

        if (accessor != null) {
          // The field is a default inline field.
          if (!parseInlineGenotypeFields(field, vcBuilder, listValueBuilder, accessor,
              currGenotype)) {
            continue;
          }
        } else {
          // Other field, we'll get type/other info from header.
          if (!parseOtherGenotypeFields(field, vc, listValueBuilder, currGenotype,
              header)) {
            continue;
          }
        }
      }

      genotypeInfo.put(field, listValueBuilder.build());
    }

    vcBuilder.putAllInfo(genotypeInfo);
    toReturn.add(vcBuilder.build());
  }

  return toReturn;
}
 
Example 16
Source File: StatisticsTask.java    From imputationserver with GNU Affero General Public License v3.0 4 votes vote down vote up
public void checkPloidy(List<String> samples, VariantContext snp, boolean isPhased, LineWriter chrXInfoWriter,
		HashSet<String> hapSamples) throws IOException {

	for (final String name : samples) {

		Genotype genotype = snp.getGenotype(name);

		if (hapSamples.contains(name) && genotype.getPloidy() != 1) {
			chrXInfoWriter.write(name + "\t" + snp.getContig() + ":" + snp.getStart());
			this.chrXPloidyError = true;

		}

		if (genotype.getPloidy() == 1) {
			hapSamples.add(name);
		}

	}
}
 
Example 17
Source File: VcfToAdpc.java    From picard with MIT License 4 votes vote down vote up
@Override
protected int doWork() {
    final List<File> inputs = IOUtil.unrollFiles(VCF, IOUtil.VCF_EXTENSIONS);
    IOUtil.assertFilesAreReadable(inputs);
    IOUtil.assertFileIsWritable(SAMPLES_FILE);
    IOUtil.assertFileIsWritable(NUM_MARKERS_FILE);
    IOUtil.assertFileIsWritable(OUTPUT);
    final List<String> sampleNames = new ArrayList<>();

    Integer numberOfLoci = null;
    try (IlluminaAdpcFileWriter adpcFileWriter = new IlluminaAdpcFileWriter(OUTPUT)) {
        for (final File inputVcf : inputs) {
            VCFFileReader vcfFileReader = new VCFFileReader(inputVcf, false);
            final VCFHeader header = vcfFileReader.getFileHeader();
            for (int sampleNumber = 0; sampleNumber < header.getNGenotypeSamples(); sampleNumber++) {
                final String sampleName = header.getGenotypeSamples().get(sampleNumber);
                sampleNames.add(sampleName);
                log.info("Processing sample: " + sampleName + " from VCF: " + inputVcf.getAbsolutePath());

                CloseableIterator<VariantContext> variants = vcfFileReader.iterator();
                int lociCount = 0;
                while (variants.hasNext()) {
                    final VariantContext context = variants.next();
                    final float gcScore = getFloatAttribute(context, InfiniumVcfFields.GC_SCORE);

                    final Genotype genotype = context.getGenotype(sampleNumber);
                    final IlluminaGenotype illuminaGenotype = getIlluminaGenotype(genotype, context);

                    final int rawXIntensity = getUnsignedShortAttributeAsInt(genotype, InfiniumVcfFields.X);
                    final int rawYIntensity = getUnsignedShortAttributeAsInt(genotype, InfiniumVcfFields.Y);

                    final Float normalizedXIntensity = getFloatAttribute(genotype, InfiniumVcfFields.NORMX);
                    final Float normalizedYIntensity = getFloatAttribute(genotype, InfiniumVcfFields.NORMY);

                    final IlluminaAdpcFileWriter.Record record = new IlluminaAdpcFileWriter.Record(rawXIntensity, rawYIntensity, normalizedXIntensity, normalizedYIntensity, gcScore, illuminaGenotype);
                    adpcFileWriter.write(record);
                    lociCount++;
                }
                if (lociCount == 0) {
                    throw new PicardException("Found no records in VCF' " + inputVcf.getAbsolutePath() + "'");
                }
                if (numberOfLoci == null) {
                    numberOfLoci = lociCount;
                } else {
                    if (lociCount != numberOfLoci) {
                        throw new PicardException("VCFs have differing number of loci");
                    }
                }
            }
        }
        writeTextToFile(SAMPLES_FILE, StringUtils.join(sampleNames, "\n"));
        writeTextToFile(NUM_MARKERS_FILE, "" + numberOfLoci);
    } catch (Exception e) {
        log.error(e);
        return 1;
    }

    return 0;
}
 
Example 18
Source File: GermlineCNVSegmentVariantComposerUnitTest.java    From gatk with BSD 3-Clause "New" or "Revised" License 4 votes vote down vote up
@Test(dataProvider = "variantCompositionSettings")
public void testVariantComposition(final int refAutosomalCopyNumber,
                                   final Set<String> allosomalContigs) {
    /* read a segments collection */
    final IntegerCopyNumberSegmentCollection collection = new IntegerCopyNumberSegmentCollection(
            IntegerCopyNumberSegmentCollectionUnitTest.TEST_INTEGER_COPY_NUMBER_SEGMENTS_FILE);
    final File segmentsOutputVCF = createTempFile("test-write-segments", ".vcf");
    final VariantContextWriter writer = GATKVariantContextUtils.createVCFWriter(segmentsOutputVCF.toPath(),
            collection.getMetadata().getSequenceDictionary(), false);
    final GermlineCNVSegmentVariantComposer variantComposer = new GermlineCNVSegmentVariantComposer(
            writer, IntegerCopyNumberSegmentCollectionUnitTest.EXPECTED_SAMPLE_NAME,
            new IntegerCopyNumberState(refAutosomalCopyNumber), allosomalContigs);

    /* compose segments and assert correctness */
    for (final IntegerCopyNumberSegment segment: collection.getRecords()) {
        final VariantContext var = variantComposer.composeVariantContext(segment);
        Assert.assertEquals(var.getContig(), segment.getContig());
        Assert.assertEquals(var.getStart(), segment.getStart());
        Assert.assertEquals(var.getEnd(), segment.getEnd());

        final Genotype gt = var.getGenotype(IntegerCopyNumberSegmentCollectionUnitTest.EXPECTED_SAMPLE_NAME);

        /* assert allele correctness */
        final Allele actualAllele = gt.getAlleles().get(0);
        final int refCopyNumber = allosomalContigs.contains(segment.getContig())
                ? segment.getBaselineIntegerCopyNumberState().getCopyNumber()
                : refAutosomalCopyNumber;
        final Allele expectedAllele;
        if (segment.getCallIntegerCopyNumberState().getCopyNumber() > refCopyNumber) {
            expectedAllele = GermlineCNVSegmentVariantComposer.DUP_ALLELE;
        } else if (segment.getCallIntegerCopyNumberState().getCopyNumber() < refCopyNumber) {
            expectedAllele = GermlineCNVSegmentVariantComposer.DEL_ALLELE;
        } else {
            expectedAllele = GermlineCNVSegmentVariantComposer.REF_ALLELE;
        }
        Assert.assertEquals(actualAllele, expectedAllele);
        Assert.assertTrue(var.getAlleles().size() == (expectedAllele.equals(GermlineCNVSegmentVariantComposer.REF_ALLELE) ? 1 : 2));
        Assert.assertTrue(var.getAlleles().contains(Allele.REF_N));
        Assert.assertTrue(var.getAlleles().contains(expectedAllele));

        /* assert correctness of quality metrics */
        Assert.assertEquals(
                (long) gt.getExtendedAttribute(GermlineCNVSegmentVariantComposer.QS),
                FastMath.round(segment.getQualitySomeCalled()));
        Assert.assertEquals(
                (long) gt.getExtendedAttribute(GermlineCNVSegmentVariantComposer.QA),
                FastMath.round(segment.getQualityAllCalled()));
        Assert.assertEquals(
                (long) gt.getExtendedAttribute(GermlineCNVSegmentVariantComposer.QSS),
                FastMath.round(segment.getQualityStart()));
        Assert.assertEquals(
                (long) gt.getExtendedAttribute(GermlineCNVSegmentVariantComposer.QSE),
                FastMath.round(segment.getQualityEnd()));
    }
}