htsjdk.variant.vcf.VCFConstants Java Examples

The following examples show how to use htsjdk.variant.vcf.VCFConstants. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: ReblockGVCFUnitTest.java    From gatk with BSD 3-Clause "New" or "Revised" License 6 votes vote down vote up
@Test
public void testLowQualVariantToGQ0HomRef() {
    final ReblockGVCF reblocker = new ReblockGVCF();

    reblocker.dropLowQuals = true;
    final Genotype g = makeG("sample1", LONG_REF, Allele.NON_REF_ALLELE, 200, 100, 200, 11, 0, 37);
    final VariantContext toBeNoCalled = makeDeletionVC("lowQualVar", Arrays.asList(LONG_REF, DELETION, Allele.NON_REF_ALLELE), LONG_REF.length(), g);
    final VariantContext dropped = reblocker.lowQualVariantToGQ0HomRef(toBeNoCalled, toBeNoCalled);
    Assert.assertEquals(dropped, null);

    reblocker.dropLowQuals = false;
    final VariantContext modified = reblocker.lowQualVariantToGQ0HomRef(toBeNoCalled, toBeNoCalled);
    Assert.assertTrue(modified.getAttributes().containsKey(VCFConstants.END_KEY));
    Assert.assertTrue(modified.getAttributes().get(VCFConstants.END_KEY).equals(13));
    Assert.assertTrue(modified.getReference().equals(SHORT_REF));
    Assert.assertTrue(modified.getAlternateAllele(0).equals(Allele.NON_REF_ALLELE));
    Assert.assertTrue(!modified.filtersWereApplied());
    Assert.assertTrue(modified.getLog10PError() == VariantContext.NO_LOG10_PERROR);

    //No-calls were throwing NPEs.  Now they're not.
    final Genotype g2 = makeG("sample1", Allele.NO_CALL,Allele.NO_CALL);
    final VariantContext noData = makeDeletionVC("noData", Arrays.asList(LONG_REF, DELETION, Allele.NON_REF_ALLELE), LONG_REF.length(), g2);
    final VariantContext notCrashing = reblocker.lowQualVariantToGQ0HomRef(noData, noData);
    Assert.assertTrue(notCrashing.getGenotype(0).isNoCall());
}
 
Example #2
Source File: FingerprintUtils.java    From picard with MIT License 6 votes vote down vote up
private static VariantContextWriter getVariantContextWriter(final File outputFile,
                                                            final File referenceSequenceFileName,
                                                            final String sample,
                                                            final String source,
                                                            final ReferenceSequenceFile ref) {
    final VariantContextWriter variantContextWriter = new VariantContextWriterBuilder()
            .setReferenceDictionary(ref.getSequenceDictionary())
            .setOutputFile(outputFile).build();

    final Set<VCFHeaderLine> lines = new LinkedHashSet<>();
    lines.add(new VCFHeaderLine("reference", referenceSequenceFileName.getAbsolutePath()));
    lines.add(new VCFHeaderLine("source", source));
    lines.add(new VCFHeaderLine("fileDate", new Date().toString()));

    lines.add(VCFStandardHeaderLines.getFormatLine(VCFConstants.GENOTYPE_PL_KEY));
    lines.add(VCFStandardHeaderLines.getFormatLine(VCFConstants.GENOTYPE_ALLELE_DEPTHS));
    lines.add(VCFStandardHeaderLines.getFormatLine(VCFConstants.DEPTH_KEY));

    final VCFHeader header = new VCFHeader(lines, Collections.singletonList(sample));
    header.setSequenceDictionary(ref.getSequenceDictionary());
    variantContextWriter.writeHeader(header);
    return variantContextWriter;
}
 
Example #3
Source File: AssemblyBasedCallerUtils.java    From gatk with BSD 3-Clause "New" or "Revised" License 6 votes vote down vote up
/**
 * Add physical phase information to the provided variant context
 *
 * @param vc   the variant context
 * @param ID   the ID to use
 * @param phaseGT the phase GT string to use
 * @return phased non-null variant context
 */
private static VariantContext phaseVC(final VariantContext vc, final String ID, final String phaseGT, final int phaseSetID) {
    final List<Genotype> phasedGenotypes = new ArrayList<>();
    for ( final Genotype g : vc.getGenotypes() ) {
        final List<Allele> alleles = g.getAlleles();
        if (phaseGT.equals(phase10) && g.isHet()) Collections.reverse(alleles); // swap the alleles if heterozygous
        final Genotype genotype = new GenotypeBuilder(g)
            .alleles(alleles)
            .phased(true)
            .attribute(GATKVCFConstants.HAPLOTYPE_CALLER_PHASING_ID_KEY, ID)
            .attribute(GATKVCFConstants.HAPLOTYPE_CALLER_PHASING_GT_KEY, phaseGT)
            .attribute(VCFConstants.PHASE_SET_KEY, phaseSetID)
            .make();
        phasedGenotypes.add(genotype);
    }
    return new VariantContextBuilder(vc).genotypes(phasedGenotypes).make();
}
 
Example #4
Source File: MappingQualityZeroUnitTest.java    From gatk with BSD 3-Clause "New" or "Revised" License 6 votes vote down vote up
@Test
public void testLikelihoods(){

    final Allele REF = Allele.create("T", true);
    final Allele ALT = Allele.create("A");

    final int refDepth= 5;
    final int altDepth= 3;
    final int refMQ = 10;
    final int altMQ = 0;
    final List<GATKRead> refReads = IntStream.range(0, refDepth).mapToObj(i -> makeRead(refMQ)).collect(Collectors.toList());
    final List<GATKRead> altReads = IntStream.range(0, altDepth).mapToObj(i -> makeRead(altMQ)).collect(Collectors.toList());
    final AlleleLikelihoods<GATKRead, Allele> likelihoods =
            ArtificialAnnotationUtils.makeLikelihoods("sample1", refReads, altReads, -1.0, -1.0, REF, ALT);

    final VariantContext vc = makeVC();
    final ReferenceContext referenceContext= null;
    final Map<String, Object> annotate = new MappingQualityZero().annotate(referenceContext, vc, likelihoods);
    Assert.assertEquals(annotate.size(), 1, "size");
    Assert.assertEquals(annotate.keySet(), Collections.singleton(VCFConstants.MAPPING_QUALITY_ZERO_KEY), "annots");
    Assert.assertEquals(annotate.get(VCFConstants.MAPPING_QUALITY_ZERO_KEY), String.valueOf(altDepth));

}
 
Example #5
Source File: AlleleSubsettingUtilsUnitTest.java    From gatk with BSD 3-Clause "New" or "Revised" License 6 votes vote down vote up
@Test(dataProvider = "updatePLsSACsAndADData")
public void testUpdatePLsAndADData(final VariantContext originalVC,
                                   final VariantContext selectedVC,
                                   final List<Genotype> expectedGenotypes) {
    // initialize cache of allele anyploid indices
    for (final Genotype genotype : originalVC.getGenotypes()) {
        GenotypeLikelihoods.initializeAnyploidPLIndexToAlleleIndices(originalVC.getNAlleles() - 1, genotype.getPloidy());
    }

    final VariantContext selectedVCwithGTs = new VariantContextBuilder(selectedVC).genotypes(originalVC.getGenotypes()).make();

    final GenotypesContext oldGs = selectedVCwithGTs.getGenotypes();
    final GenotypesContext actual = selectedVCwithGTs.getNAlleles() == originalVC.getNAlleles() ? oldGs :
                                    AlleleSubsettingUtils.subsetAlleles(oldGs, 0, originalVC.getAlleles(),
                                                                        selectedVCwithGTs.getAlleles(),
                                                                        GenotypeAssignmentMethod.DO_NOT_ASSIGN_GENOTYPES,
                                                                        originalVC.getAttributeAsInt(VCFConstants.DEPTH_KEY, 0));

    Assert.assertEquals(actual.size(), expectedGenotypes.size());
    for ( final Genotype expected : expectedGenotypes ) {
        final Genotype actualGT = actual.get(expected.getSampleName());
        Assert.assertNotNull(actualGT);
        VariantContextTestUtils.assertGenotypesAreEqual(actualGT, expected);
    }
}
 
Example #6
Source File: VariantEvaluationContextBuilder.java    From gatk-protected with BSD 3-Clause "New" or "Revised" License 6 votes vote down vote up
private void calculateAFs(final Iterable<Genotype> genotypes) {
    final int[] truthAC = new int[CopyNumberTriStateAllele.ALL_ALLELES.size()];
    final int[] callsAC = new int[CopyNumberTriStateAllele.ALL_ALLELES.size()];
    for (final Genotype genotype : genotypes) {
        final List<Allele> alleles = genotype.getAlleles();
        if (alleles.size() > 1) {
            throw new GATKException("unexpected CNV genotype ploidy: " + alleles.size());
        } else if (!alleles.isEmpty() && alleles.get(0).isCalled()) {
            final int index = CopyNumberTriStateAllele.valueOf(alleles.get(0)).index();
            callsAC[index]++;
        }
        final String truthGT = String.valueOf(genotype.getExtendedAttribute(VariantEvaluationContext.TRUTH_GENOTYPE_KEY, VCFConstants.MISSING_VALUE_v4));
        final int truthAlleleIndex = truthGT.equals(VCFConstants.MISSING_VALUE_v4) ? -1 : Integer.parseInt(truthGT);
        if (truthAlleleIndex >= 0) {
            final List<Allele> contextAlleles = getAlleles();
            if (truthAlleleIndex >= contextAlleles.size()) {
                throw new GATKException("unexpected CNV truth genotype makes reference to a non-existent allele: " + truthGT);
            }
            truthAC[CopyNumberTriStateAllele.valueOf(contextAlleles.get(truthAlleleIndex)).index()]++;
        }
        genotype.getAllele(0);
    }
    this.truthAC = truthAC;
    this.callsAC = callsAC;
}
 
Example #7
Source File: VariantDataManager.java    From gatk with BSD 3-Clause "New" or "Revised" License 6 votes vote down vote up
public void writeOutRecalibrationTable(final VariantContextWriter recalWriter, final SAMSequenceDictionary seqDictionary) {
    // we need to sort in coordinate order in order to produce a valid VCF
    Collections.sort( data, VariantDatum.getComparator(seqDictionary) );

    // create dummy alleles to be used
    List<Allele> alleles = Arrays.asList(Allele.create("N", true), Allele.create("<VQSR>", false));

    for( final VariantDatum datum : data ) {
        if (VRAC.useASannotations)
            alleles = Arrays.asList(datum.referenceAllele, datum.alternateAllele); //use the alleles to distinguish between multiallelics in AS mode
        VariantContextBuilder builder = new VariantContextBuilder("VQSR", datum.loc.getContig(), datum.loc.getStart(), datum.loc.getEnd(), alleles);
        builder.attribute(VCFConstants.END_KEY, datum.loc.getEnd());
        builder.attribute(GATKVCFConstants.VQS_LOD_KEY, String.format("%.4f", datum.lod));
        builder.attribute(GATKVCFConstants.CULPRIT_KEY, (datum.worstAnnotation != -1 ? annotationKeys.get(datum.worstAnnotation) : "NULL"));

        if ( datum.atTrainingSite ) builder.attribute(GATKVCFConstants.POSITIVE_LABEL_KEY, true);
        if ( datum.atAntiTrainingSite ) builder.attribute(GATKVCFConstants.NEGATIVE_LABEL_KEY, true);

        recalWriter.add(builder.make());
    }
}
 
Example #8
Source File: OrientationBiasFiltererUnitTest.java    From gatk-protected with BSD 3-Clause "New" or "Revised" License 6 votes vote down vote up
@Test
public void testAnnotateVariantContextWithPreprocessingValues() {
    final FeatureDataSource<VariantContext> featureDataSource = new FeatureDataSource<>(new File(smallM2Vcf));
    SortedSet<Transition> relevantTransitions = new TreeSet<>();
    relevantTransitions.add(Transition.transitionOf('G', 'T'));

    final Map<Transition, Double> preAdapterQFakeScoreMap = new HashMap<>();
    final double amGTPreAdapterQ = 20.0;
    preAdapterQFakeScoreMap.put(Transition.transitionOf('G', 'T'), amGTPreAdapterQ);  // preAdapterQ suppression will do nothing.

    for (final VariantContext vc : featureDataSource) {
        final VariantContext updatedVariantContext = OrientationBiasFilterer.annotateVariantContextWithPreprocessingValues(vc, relevantTransitions, preAdapterQFakeScoreMap);

        final Genotype genotypeTumor = updatedVariantContext.getGenotype("TUMOR");
        final Genotype genotypeNormal = updatedVariantContext.getGenotype("NORMAL");

        Assert.assertNotEquals(genotypeTumor.getExtendedAttribute(OrientationBiasFilterConstants.FOB, VCFConstants.EMPTY_ALLELE), VCFConstants.EMPTY_ALLELE);
        Assert.assertNotEquals(genotypeTumor.getExtendedAttribute(OrientationBiasFilterConstants.P_ARTIFACT_FIELD_NAME, VCFConstants.EMPTY_ALLELE), VCFConstants.EMPTY_ALLELE);

        assertArtifact(amGTPreAdapterQ, genotypeTumor, Transition.transitionOf('G', 'T'));

        // The NORMAL is always ref/ref in the example file.
        assertNormal(genotypeNormal);
    }
}
 
Example #9
Source File: PosteriorProbabilitiesUtilsUnitTest.java    From gatk with BSD 3-Clause "New" or "Revised" License 6 votes vote down vote up
@Test
public void testCalculatePosteriorSamplePlusExternal() {
    final VariantContext testOverlappingBase = makeVC("1", Arrays.asList(Aref,T), makeG("s1",T,T,40,20,0),
            makeG("s2",Aref,T,18,0,24),
            makeG("s3",Aref,T,22,0,12));
    final List<VariantContext> supplTest1 = new ArrayList<>(3);
    supplTest1.add(new VariantContextBuilder(makeVC("2", Arrays.asList(Aref,T))).attribute(GATKVCFConstants.MLE_ALLELE_COUNT_KEY,2).attribute(VCFConstants.ALLELE_NUMBER_KEY,10).make());
    supplTest1.add(new VariantContextBuilder(makeVC("3", Arrays.asList(Aref,T))).attribute(VCFConstants.ALLELE_COUNT_KEY,4).attribute(VCFConstants.ALLELE_NUMBER_KEY,22).make());
    supplTest1.add(makeVC("4", Arrays.asList(Aref,T),
            makeG("s_1",T,T),
            makeG("s_2",Aref,T)));
    final VariantContext test1result = PosteriorProbabilitiesUtils.calculatePosteriorProbs(testOverlappingBase,supplTest1, 0, defaultOptions);
    // the counts here are ref=30, alt=14
    final Genotype test1exp1 = makeGwithLog10GLs("t1",T,T,new double[]{-3.370985, -1.415172, -0.01721766});
    final Genotype test1exp2 = makeGwithLog10GLs("t2",Aref,T,new double[]{-1.763792, -0.007978791, -3.010024});
    final Genotype test1exp3 = makeGwithLog10GLs("t3",Aref,T,new double[]{-2.165587, -0.009773643, -1.811819});
    Assert.assertEquals(arraysEq(test1exp1.getPL(),_mleparse((List<Integer>) test1result.getGenotype(0).getAnyAttribute(GATKVCFConstants.PHRED_SCALED_POSTERIORS_KEY))), "");
    Assert.assertEquals(arraysEq(test1exp2.getPL(),_mleparse((List<Integer>) test1result.getGenotype(1).getAnyAttribute(GATKVCFConstants.PHRED_SCALED_POSTERIORS_KEY))), "");
    Assert.assertEquals(arraysEq(test1exp3.getPL(),_mleparse((List<Integer>) test1result.getGenotype(2).getAnyAttribute(GATKVCFConstants.PHRED_SCALED_POSTERIORS_KEY))), "");

    final VariantContext testNonOverlapping = makeVC("1", Arrays.asList(Aref,T), makeG("s1",T,T,3,1,0));
    final List<VariantContext> other = Arrays.asList(makeVC("2", Arrays.asList(Aref,C),makeG("s2",C,C,10,2,0)));
    final VariantContext test2result = PosteriorProbabilitiesUtils.calculatePosteriorProbs(testNonOverlapping,other,0, defaultOptions);
    final Genotype test2exp1 = makeGwithLog10GLs("SGV",T,T,new double[]{-4.078345, -3.276502, -0.0002661066});
    Assert.assertEquals(arraysEq(test2exp1.getPL(),_mleparse((List<Integer>) test2result.getGenotype(0).getAnyAttribute(GATKVCFConstants.PHRED_SCALED_POSTERIORS_KEY))), "");
}
 
Example #10
Source File: RMSMappingQualityUnitTest.java    From gatk with BSD 3-Clause "New" or "Revised" License 6 votes vote down vote up
@Test
public void testGetNumReads(){
    final Allele refAllele = Allele.create("A", true);
    final Allele altAllele = Allele.create("T");
    final Genotype homRefDP = new GenotypeBuilder("sample1", Arrays.asList(refAllele, refAllele)).DP(5).make();
    final Genotype homRefMinDP = new  GenotypeBuilder("sample2", Arrays.asList(refAllele, refAllele))
            .DP(11)
            .attribute(GATKVCFConstants.MIN_DP_FORMAT_KEY, 10).make();
    final Genotype hetAlt = new GenotypeBuilder("sample3", Arrays.asList(refAllele, altAllele)).DP(100).make();
    final VariantContext vc = new VariantContextBuilder().alleles(Arrays.asList(refAllele, altAllele))
            .chr("1")
            .start(15L)
            .stop(15L)
            .genotypes(homRefDP, homRefMinDP, hetAlt)
            .attribute(VCFConstants.DEPTH_KEY, 5 + 10 + 100)
            .make();

    Assert.assertEquals(RMSMappingQuality.getNumOfReads(vc, null), 115 - 5 - 10);
}
 
Example #11
Source File: PosteriorProbabilitiesUtilsUnitTest.java    From gatk-protected with BSD 3-Clause "New" or "Revised" License 6 votes vote down vote up
@Test
public void testCalculatePosteriorSamplePlusExternal() {
    final VariantContext testOverlappingBase = makeVC("1", Arrays.asList(Aref,T), makeG("s1",T,T,40,20,0),
            makeG("s2",Aref,T,18,0,24),
            makeG("s3",Aref,T,22,0,12));
    final List<VariantContext> supplTest1 = new ArrayList<>(3);
    supplTest1.add(new VariantContextBuilder(makeVC("2", Arrays.asList(Aref,T))).attribute(GATKVCFConstants.MLE_ALLELE_COUNT_KEY,2).attribute(VCFConstants.ALLELE_NUMBER_KEY,10).make());
    supplTest1.add(new VariantContextBuilder(makeVC("3", Arrays.asList(Aref,T))).attribute(VCFConstants.ALLELE_COUNT_KEY,4).attribute(VCFConstants.ALLELE_NUMBER_KEY,22).make());
    supplTest1.add(makeVC("4", Arrays.asList(Aref,T),
            makeG("s_1",T,T),
            makeG("s_2",Aref,T)));
    final VariantContext test1result = PosteriorProbabilitiesUtils.calculatePosteriorProbs(testOverlappingBase,supplTest1,0,0.001,true,false, false);
    // the counts here are ref=30, alt=14
    final Genotype test1exp1 = makeGwithPLs("t1",T,T,new double[]{-3.370985, -1.415172, -0.01721766});
    final Genotype test1exp2 = makeGwithPLs("t2",Aref,T,new double[]{-1.763792, -0.007978791, -3.010024});
    final Genotype test1exp3 = makeGwithPLs("t3",Aref,T,new double[]{-2.165587, -0.009773643, -1.811819});
    Assert.assertEquals(arraysEq(test1exp1.getPL(),_mleparse((List<Integer>) test1result.getGenotype(0).getAnyAttribute(GATKVCFConstants.PHRED_SCALED_POSTERIORS_KEY))), "");
    Assert.assertEquals(arraysEq(test1exp2.getPL(),_mleparse((List<Integer>) test1result.getGenotype(1).getAnyAttribute(GATKVCFConstants.PHRED_SCALED_POSTERIORS_KEY))), "");
    Assert.assertEquals(arraysEq(test1exp3.getPL(),_mleparse((List<Integer>) test1result.getGenotype(2).getAnyAttribute(GATKVCFConstants.PHRED_SCALED_POSTERIORS_KEY))), "");

    final VariantContext testNonOverlapping = makeVC("1", Arrays.asList(Aref,T), makeG("s1",T,T,3,1,0));
    final List<VariantContext> other = Arrays.asList(makeVC("2", Arrays.asList(Aref,C),makeG("s2",C,C,10,2,0)));
    final VariantContext test2result = PosteriorProbabilitiesUtils.calculatePosteriorProbs(testNonOverlapping,other,0,0.001,true,false,false);
    final Genotype test2exp1 = makeGwithPLs("SGV",T,T,new double[]{-4.078345, -3.276502, -0.0002661066});
    Assert.assertEquals(arraysEq(test2exp1.getPL(),_mleparse((List<Integer>) test2result.getGenotype(0).getAnyAttribute(GATKVCFConstants.PHRED_SCALED_POSTERIORS_KEY))), "");
}
 
Example #12
Source File: VcfToVariant.java    From genomewarp with Apache License 2.0 6 votes vote down vote up
private static boolean parseInlineGenotypeFields(String field, VariantCall.Builder vcBuilder,
    ListValue.Builder lvBuilder, IntGenotypeFieldAccessors.Accessor accessor, Genotype g) {

  final int[] intValues = accessor.getValues(g);
  if (intValues == null || intValues.length == 0) {
    return false;
  }

  if (field.equals(VCFConstants.GENOTYPE_PL_KEY)) {
    // HTSJDK folds GL's into PL's. We only use PL's to store genotype likelihood.
    for (int i = 0; i < intValues.length; i++) {
      // We add 0.0 to remove the possiblity of getting -0.0.
      vcBuilder.addGenotypeLikelihood(-(double) intValues[i] / 10.0 + 0.0);
    }
    return false;
  }

  for (int i = 0; i < intValues.length; i++) {
    lvBuilder.addValues(Value.newBuilder().setNumberValue(intValues[i]));
  }
  return true;
}
 
Example #13
Source File: ReferenceConfidenceVariantContextMerger.java    From gatk with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
/**
 * lookup the depth from the VC DP field or calculate by summing the depths of the genotypes
 */
protected static int calculateVCDepth(VariantContext vc) {
    if ( vc.hasAttribute(VCFConstants.DEPTH_KEY) ) {
        return vc.getAttributeAsInt(VCFConstants.DEPTH_KEY, 0);
    } else { // handle the gVCF case from the HaplotypeCaller
        return vc.getGenotypes().stream()
                .mapToInt(ReferenceConfidenceVariantContextMerger::getBestDepthValue)
                .sum();
    }
}
 
Example #14
Source File: PosteriorProbabilitiesUtilsUnitTest.java    From gatk-protected with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
@Test
public void testMultipleMLEACvalues() {
    final VariantContext testOverlappingBase = makeVC("1", Arrays.asList(Aref,T), makeG("s1",T,T,40,20,0),
            makeG("s2",Aref,T,18,0,24),
            makeG("s3",Aref,T,22,0,12));
    final List<VariantContext> supplTest1 = new ArrayList<>(1);
    supplTest1.add(new VariantContextBuilder(makeVC("2", Arrays.asList(Aref,T,C))).attribute(GATKVCFConstants.MLE_ALLELE_COUNT_KEY, Arrays.asList(5,4)).attribute(VCFConstants.ALLELE_NUMBER_KEY,10).make());
    final VariantContext test1result = PosteriorProbabilitiesUtils.calculatePosteriorProbs(testOverlappingBase,supplTest1,0,0.001,true,false,false);
}
 
Example #15
Source File: VariantRecalibrationUtils.java    From gatk with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
/**
 * Add the standard VCF header lines used with VQSR.
 * @param hInfo updated set of VCFHeaderLines
 */
protected static void addVQSRStandardHeaderLines(final Set<VCFHeaderLine> hInfo) {
    hInfo.add(VCFStandardHeaderLines.getInfoLine(VCFConstants.END_KEY));
    hInfo.add(GATKVCFHeaderLines.getInfoLine(GATKVCFConstants.VQS_LOD_KEY));
    hInfo.add(GATKVCFHeaderLines.getInfoLine(GATKVCFConstants.CULPRIT_KEY));
    hInfo.add(GATKVCFHeaderLines.getInfoLine(GATKVCFConstants.POSITIVE_LABEL_KEY));
    hInfo.add(GATKVCFHeaderLines.getInfoLine(GATKVCFConstants.NEGATIVE_LABEL_KEY));
    hInfo.add(GATKVCFHeaderLines.getFilterLine(VCFConstants.PASSES_FILTERS_v4));
}
 
Example #16
Source File: CombineGVCFs.java    From gatk with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
/**
 * Combine a list of reference block VariantContexts.
 * We can't use GATKVariantContextUtils.simpleMerge() because it is just too slow for this sort of thing.
 *
 * @param vcs   the variant contexts to merge
 * @param end   the end of this block (inclusive)
 * @return a new merged VariantContext
 */
private VariantContext referenceBlockMerge(final List<VariantContext> vcs, final int end) {

    final VariantContext first = vcs.get(0);

    // ref allele and start
    final Allele refAllele;
    final int start;
    if ( prevPos == null || !prevPos.getContig().equals(first.getContig()) || first.getStart() >= prevPos.getStart() + 1) {
        start = first.getStart();
        refAllele = first.getReference();
    } else {
        start = prevPos.getStart() + 1;
        refAllele = Allele.create(refAfterPrevPos, true);
    }

    // attributes
    final Map<String, Object> attrs = new HashMap<>(1);
    if ( !useBpResolution && end != start ) {
        attrs.put(VCFConstants.END_KEY, Integer.toString(end));
    }

    // genotypes
    final GenotypesContext genotypes = GenotypesContext.create();
    for (final VariantContext vc : vcs) {
        for (final Genotype g : vc.getGenotypes()) {
            genotypes.add(new GenotypeBuilder(g).alleles(GATKVariantContextUtils.noCallAlleles(g.getPloidy())).make());
        }
    }
    return new VariantContextBuilder("", first.getContig(), start, end, Arrays.asList(refAllele, Allele.NON_REF_ALLELE)).attributes(attrs).genotypes(genotypes).make();
}
 
Example #17
Source File: CombineGVCFs.java    From gatk with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
private VariantContextWriter getVCFWriter() {
    final SortedSet<String> samples = getSamplesForVariants();

    final VCFHeader inputVCFHeader = new VCFHeader(getHeaderForVariants().getMetaDataInInputOrder(), samples);

    final Set<VCFHeaderLine> headerLines = new LinkedHashSet<>(inputVCFHeader.getMetaDataInInputOrder());
    headerLines.addAll(getDefaultToolVCFHeaderLines());

    headerLines.addAll(annotationEngine.getVCFAnnotationDescriptions());

    // add headers for annotations added by this tool
    headerLines.add(VCFStandardHeaderLines.getInfoLine(VCFConstants.DEPTH_KEY));   // needed for gVCFs without DP tags
    if ( dbsnp.dbsnp != null  ) {
        VCFStandardHeaderLines.addStandardInfoLines(headerLines, true, VCFConstants.DBSNP_KEY);
    }

    if (somaticInput) {
        //single-sample M2 variant filter status will get moved to genotype filter
        headerLines.add(VCFStandardHeaderLines.getFormatLine(VCFConstants.GENOTYPE_FILTER_KEY));

        if (!dropSomaticFilteringAnnotations) {
            //standard M2 INFO annotations for filtering will get moved to FORMAT field
            for (final String key : Mutect2FilteringEngine.STANDARD_MUTECT_INFO_FIELDS_FOR_FILTERING) {
                headerLines.add(GATKVCFHeaderLines.getEquivalentFormatHeaderLine(key));
            }
        }
    }

    VariantContextWriter writer = createVCFWriter(outputFile);

    final Set<String> sampleNameSet = new IndexedSampleList(samples).asSetOfSamples();
    final VCFHeader vcfHeader = new VCFHeader(headerLines, new TreeSet<>(sampleNameSet));
    writer.writeHeader(vcfHeader);

    return writer;
}
 
Example #18
Source File: AnnotatedVariantProducer.java    From gatk with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
@VisibleForTesting
static Map<String, Object> getAssemblyEvidenceRelatedAnnotations(final Iterable<SimpleChimera> splitAlignmentEvidence) {

    final List<ChimericContigAlignmentEvidenceAnnotations> annotations =
            Utils.stream(splitAlignmentEvidence)
                    .sorted(Comparator.comparing(evidence -> evidence.sourceContigName))
                    .map(ChimericContigAlignmentEvidenceAnnotations::new)
                    .collect(Collectors.toList());

    final Map<String, Object> attributeMap = new HashMap<>();
    attributeMap.put(GATKSVVCFConstants.TOTAL_MAPPINGS,    annotations.size());
    attributeMap.put(GATKSVVCFConstants.HQ_MAPPINGS,       annotations.stream().filter(annotation -> annotation.minMQ == DiscoverVariantsFromContigAlignmentsSparkArgumentCollection.CHIMERIC_ALIGNMENTS_HIGHMQ_THRESHOLD).count());
    attributeMap.put(GATKSVVCFConstants.MAPPING_QUALITIES, annotations.stream().map(annotation -> String.valueOf(annotation.minMQ)).collect(Collectors.joining(VCFConstants.INFO_FIELD_ARRAY_SEPARATOR)));
    attributeMap.put(GATKSVVCFConstants.ALIGN_LENGTHS,     annotations.stream().map(annotation -> String.valueOf(annotation.minAL)).collect(Collectors.joining(VCFConstants.INFO_FIELD_ARRAY_SEPARATOR)));
    attributeMap.put(GATKSVVCFConstants.MAX_ALIGN_LENGTH,  annotations.stream().map(annotation -> annotation.minAL).max(Comparator.naturalOrder()).orElse(0));
    attributeMap.put(GATKSVVCFConstants.CONTIG_NAMES,      annotations.stream().map(annotation -> annotation.sourceContigName).collect(Collectors.joining(VCFConstants.INFO_FIELD_ARRAY_SEPARATOR)));

    final List<String> insertionMappings = annotations.stream().map(annotation -> annotation.insSeqMappings).flatMap(List::stream).sorted().collect(Collectors.toList());

    if (!insertionMappings.isEmpty()) {
        attributeMap.put(GATKSVVCFConstants.INSERTED_SEQUENCE_MAPPINGS, insertionMappings.stream().collect(Collectors.joining(VCFConstants.INFO_FIELD_ARRAY_SEPARATOR)));
    }

    final List<String> goodCanonicalMapping = annotations.stream().map(annotation -> annotation.goodNonCanonicalMappingSATag)
            .filter(s -> ! s.equals(AssemblyContigWithFineTunedAlignments.NO_GOOD_MAPPING_TO_NON_CANONICAL_CHROMOSOME)).collect(Collectors.toList());
    if (!goodCanonicalMapping.isEmpty()) {
        attributeMap.put(GATKSVVCFConstants.CTG_GOOD_NONCANONICAL_MAPPING, goodCanonicalMapping.stream().collect(Collectors.joining(VCFConstants.INFO_FIELD_ARRAY_SEPARATOR)));
    }

    return attributeMap;
}
 
Example #19
Source File: ValidateVariants.java    From gatk with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
private Set<String> getRSIDs(FeatureContext featureContext) {
    Set<String> rsIDs = new LinkedHashSet<>();
    for (VariantContext rsID : featureContext.getValues(dbsnp.dbsnp)) {
        rsIDs.addAll(Arrays.asList(rsID.getID().split(VCFConstants.ID_FIELD_SEPARATOR)));
    }
    return rsIDs;
}
 
Example #20
Source File: PosteriorProbabilitiesUtils.java    From gatk with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
/**
 * Check the formatting on the Object returned by a call to VariantContext::getAttribute() and parse appropriately
 * @param integerListContainingVCField - Object returned by a call to VariantContext::getAttribute()
 * @return - array of ints
 *
 * //Note: if we're working with a integerListContainingVCField that's read directly out of the file it will be a String but
 * if it gets pulled from a VariantContext object built elsewhere in the code it will be an Integer or a List,
 */
@SuppressWarnings("unchecked")
public static int[] extractInts(final Object integerListContainingVCField) {
    List<Integer> mleList = null;
    if ( integerListContainingVCField instanceof List) {
        if ( ((List) integerListContainingVCField).get(0) instanceof String) {
            mleList = new ArrayList<>(((List) integerListContainingVCField).size());
            for ( final Object s : ((List)integerListContainingVCField)) {
                mleList.add(Integer.parseInt((String) s));
            }
        } else {
            mleList = (List<Integer>) integerListContainingVCField;
        }
    } else if ( integerListContainingVCField instanceof Integer) {
        mleList = Arrays.asList((Integer) integerListContainingVCField);
    } else if ( integerListContainingVCField instanceof String) {
        mleList = Arrays.asList(Integer.parseInt((String)integerListContainingVCField));
    }
    Utils.nonNull( mleList, () -> String.format("VCF does not have properly formatted %s or %s.",
                GATKVCFConstants.MLE_ALLELE_COUNT_KEY, VCFConstants.ALLELE_COUNT_KEY));

    final int[] mle = new int[mleList.size()];

    if ( ! ( mleList.get(0) instanceof Integer) ) {
        throw new IllegalStateException("BUG: The AC values should be an Integer, but was " + mleList.get(0).getClass().getCanonicalName());
    }

    for ( int idx = 0; idx < mle.length; idx++) {
        mle[idx] = mleList.get(idx);
    }

    return mle;
}
 
Example #21
Source File: VariantContextTestUtils.java    From gatk with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
public static VariantContext makeVariantContext(VariantContextBuilder vcb, List<Allele> alleles, int gq, int[] PPs) {
    final GenotypeBuilder gb = new GenotypeBuilder(SAMPLE_NAME, alleles);
    gb.DP(10);
    gb.AD(new int[]{1, 2});
    gb.PL(new int[]{0, gq, 20+gq});
    gb.attribute(GATKVCFConstants.PHRED_SCALED_POSTERIORS_KEY, Utils.listFromPrimitives(PPs));
    gb.GQ(MathUtils.secondSmallestMinusSmallest(PPs, gq));
    return vcb.genotypes(gb.make()).id(VCFConstants.EMPTY_ID_FIELD).make();
}
 
Example #22
Source File: PosteriorProbabilitiesUtilsUnitTest.java    From gatk with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
@Test
public void testMultipleMLEACvalues() {
    final VariantContext testOverlappingBase = makeVC("1", Arrays.asList(Aref,T), makeG("s1",T,T,40,20,0),
            makeG("s2",Aref,T,18,0,24),
            makeG("s3",Aref,T,22,0,12));
    final List<VariantContext> supplTest1 = new ArrayList<>(1);
    supplTest1.add(new VariantContextBuilder(makeVC("2", Arrays.asList(Aref,T,C))).attribute(GATKVCFConstants.MLE_ALLELE_COUNT_KEY, Arrays.asList(5,4)).attribute(VCFConstants.ALLELE_NUMBER_KEY,10).make());
    final VariantContext test1result = PosteriorProbabilitiesUtils.calculatePosteriorProbs(testOverlappingBase,supplTest1,0,defaultOptions);
}
 
Example #23
Source File: LiftoverUtils.java    From picard with MIT License 5 votes vote down vote up
protected static VariantContextBuilder reverseComplementVariantContext(final VariantContext source, final Interval target, final ReferenceSequence refSeq) {
    if (target.isPositiveStrand()) {
        throw new IllegalArgumentException("This should only be called for negative strand liftovers");
    }
    final int start = target.getStart();
    final int stop = target.getEnd();

    final List<Allele> origAlleles = new ArrayList<>(source.getAlleles());
    final VariantContextBuilder vcb = new VariantContextBuilder(source);

    vcb.rmAttribute(VCFConstants.END_KEY)
            .chr(target.getContig())
            .start(start)
            .stop(stop)
            .alleles(reverseComplementAlleles(origAlleles));

    if (isIndelForLiftover(source)) {
        // check that the reverse complemented bases match the new reference
        if (referenceAlleleDiffersFromReferenceForIndel(vcb.getAlleles(), refSeq, start, stop)) {
            return null;
        }
        leftAlignVariant(vcb, start, stop, vcb.getAlleles(), refSeq);
    }

    vcb.genotypes(fixGenotypes(source.getGenotypes(), origAlleles, vcb.getAlleles()));

    return vcb;
}
 
Example #24
Source File: Mutect2Engine.java    From gatk with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
public void writeHeader(final VariantContextWriter vcfWriter, final Set<VCFHeaderLine> defaultToolHeaderLines) {
    final Set<VCFHeaderLine> headerInfo = new HashSet<>();
    headerInfo.add(new VCFHeaderLine("MutectVersion", MUTECT_VERSION));
    headerInfo.add(new VCFHeaderLine(FilterMutectCalls.FILTERING_STATUS_VCF_KEY, "Warning: unfiltered Mutect 2 calls.  Please run " + FilterMutectCalls.class.getSimpleName() + " to remove false positives."));
    headerInfo.addAll(annotationEngine.getVCFAnnotationDescriptions(false));
    headerInfo.addAll(defaultToolHeaderLines);
    STANDARD_MUTECT_INFO_FIELDS.stream().map(GATKVCFHeaderLines::getInfoLine).forEach(headerInfo::add);

    VCFStandardHeaderLines.addStandardFormatLines(headerInfo, true,
            VCFConstants.GENOTYPE_KEY,
            VCFConstants.GENOTYPE_ALLELE_DEPTHS,
            VCFConstants.GENOTYPE_QUALITY_KEY,
            VCFConstants.DEPTH_KEY,
            VCFConstants.GENOTYPE_PL_KEY);
    headerInfo.add(GATKVCFHeaderLines.getFormatLine(GATKVCFConstants.ALLELE_FRACTION_KEY));
    headerInfo.add(GATKVCFHeaderLines.getFormatLine(GATKVCFConstants.HAPLOTYPE_CALLER_PHASING_ID_KEY));
    headerInfo.add(GATKVCFHeaderLines.getFormatLine(GATKVCFConstants.HAPLOTYPE_CALLER_PHASING_GT_KEY));
    headerInfo.add(VCFStandardHeaderLines.getFormatLine(VCFConstants.PHASE_SET_KEY));

    ReadUtils.getSamplesFromHeader(header).stream().filter(this::isTumorSample)
            .forEach(s -> headerInfo.add(new VCFHeaderLine(TUMOR_SAMPLE_KEY_IN_VCF_HEADER, s)));

    normalSamples.forEach(sample -> headerInfo.add(new VCFHeaderLine(NORMAL_SAMPLE_KEY_IN_VCF_HEADER, sample)));
    if (emitReferenceConfidence()) {
        headerInfo.addAll(referenceConfidenceModel.getVCFHeaderLines());
        headerInfo.add(GATKVCFHeaderLines.getFormatLine(GATKVCFConstants.TUMOR_LOG_10_ODDS_KEY));
    }

    final VCFHeader vcfHeader = new VCFHeader(headerInfo, samplesList.asListOfSamples());
    vcfHeader.setSequenceDictionary(header.getSequenceDictionary());
    vcfWriter.writeHeader(vcfHeader);
}
 
Example #25
Source File: ReblockGVCFUnitTest.java    From gatk with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
@Test
public void testChangeCallToGQ0HomRef() {
    final ReblockGVCF reblocker = new ReblockGVCF();

    final Genotype g = makeG("sample1", LONG_REF, Allele.NON_REF_ALLELE, 200, 100, 200, 11, 0, 37);
    final VariantContext toBeNoCalled = makeDeletionVC("lowQualVar", Arrays.asList(LONG_REF, DELETION, Allele.NON_REF_ALLELE), LONG_REF.length(), g);
    final Map<String, Object> noAttributesMap = new HashMap<>();
    final GenotypeBuilder noCalled = reblocker.changeCallToGQ0HomRef(toBeNoCalled, noAttributesMap);
    final Genotype newG = noCalled.make();
    Assert.assertTrue(noAttributesMap.containsKey(VCFConstants.END_KEY));
    Assert.assertTrue(noAttributesMap.get(VCFConstants.END_KEY).equals(13));
    Assert.assertTrue(newG.getAllele(0).equals(SHORT_REF));
    Assert.assertTrue(newG.getAllele(1).equals(SHORT_REF));
    Assert.assertTrue(!newG.hasAD());
}
 
Example #26
Source File: PosteriorProbabilitiesUtilsUnitTest.java    From gatk with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
@Test
public void testInputIndel() {
    final VariantContext inputIndel = makeVC("1", Arrays.asList(Aref, ATC), makeG("s1",ATC,ATC,40,20,0),
            makeG("s2",Aref,ATC,18,0,24),
            makeG("s3",Aref,ATC,22,0,12),
            makeG("s5",Aref,Aref,0,15,90),
            makeG("s6",Aref,ATC,40,0,10),
            makeG("s7",Aref,Aref,0,5,8),
            makeG("s8",Aref,ATC,20,0,10),
            makeG("s9",Aref,Aref,0,15,90),
            makeG("s10",Aref,ATC,40,0,10),
            makeG("s11",Aref,Aref,0,5,8),
            makeG("s12",ATC,ATC,20,0,10));
    final List<VariantContext> supplTest1 = new ArrayList<>(1);
    supplTest1.add(new VariantContextBuilder(makeVC("2", Arrays.asList(Aref,T,C))).attribute(GATKVCFConstants.MLE_ALLELE_COUNT_KEY, Arrays.asList(5,4)).attribute(VCFConstants.ALLELE_NUMBER_KEY,10).make());
    final VariantContext test1result = PosteriorProbabilitiesUtils.calculatePosteriorProbs(inputIndel,supplTest1,0,defaultOptions);

    final int[] sample0_PPs = _mleparse( (List<Integer>)test1result.getGenotype(0).getAnyAttribute(GATKVCFConstants.PHRED_SCALED_POSTERIORS_KEY));
    final int[] sample0_expectedPPs = {36,16,0};
    Assert.assertEquals(sample0_PPs,sample0_expectedPPs);
    final int[] sample1_PPs = _mleparse( (List<Integer>)test1result.getGenotype(1).getAnyAttribute(GATKVCFConstants.PHRED_SCALED_POSTERIORS_KEY));
    final int[] sample1_expectedPPs = {19,0,28};
    Assert.assertEquals(sample1_PPs,sample1_expectedPPs);
    final int[] sample2_PPs = _mleparse( (List<Integer>)test1result.getGenotype(2).getAnyAttribute(GATKVCFConstants.PHRED_SCALED_POSTERIORS_KEY));
    final int[] sample2_expectedPPs = {23,0,16};
    Assert.assertEquals(sample2_PPs,sample2_expectedPPs);
}
 
Example #27
Source File: PosteriorProbabilitiesUtilsUnitTest.java    From gatk-protected with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
@Test
public void testMLEACgreaterThanAN() {
    final VariantContext testOverlappingBase = makeVC("1", Arrays.asList(Aref,T), makeG("s1",T,T,40,20,0),
            makeG("s2",Aref,T,18,0,24),
            makeG("s3",Aref,T,22,0,12));
    final List<VariantContext> supplTest1 = new ArrayList<>(1);
    supplTest1.add(new VariantContextBuilder(makeVC("2", Arrays.asList(Aref,T))).attribute(GATKVCFConstants.MLE_ALLELE_COUNT_KEY,11).attribute(VCFConstants.ALLELE_NUMBER_KEY,10).make());
    final VariantContext test1result = PosteriorProbabilitiesUtils.calculatePosteriorProbs(testOverlappingBase,supplTest1,0,0.001,true,false,false);
}
 
Example #28
Source File: ApplyVQSRUnitTest.java    From gatk with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
@Test
public final void testGenerateFilterString() {
    final ApplyVQSR ar = new ApplyVQSR();
    ar.VQSLOD_CUTOFF = 0.0;
    Assert.assertTrue(ar.generateFilterString(5.0).equals(VCFConstants.PASSES_FILTERS_v4));
    Assert.assertTrue(ar.generateFilterString(-5.0).equals(ApplyVQSR.LOW_VQSLOD_FILTER_NAME));
}
 
Example #29
Source File: PosteriorProbabilitiesUtilsUnitTest.java    From gatk-protected with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
@Test(expectedExceptions = {UserException.class})
public void testWrongNumberACvalues() {
    final VariantContext testOverlappingBase = makeVC("1", Arrays.asList(Aref,T), makeG("s1",T,T,40,20,0),
            makeG("s2",Aref,T,18,0,24),
            makeG("s3",Aref,T,22,0,12));
    final List<VariantContext> supplTest1 = new ArrayList<>(1);
    supplTest1.add(new VariantContextBuilder(makeVC("2", Arrays.asList(Aref,T,C))).attribute(VCFConstants.ALLELE_COUNT_KEY,5).attribute(VCFConstants.ALLELE_NUMBER_KEY,10).make());

    final VariantContext test1result = PosteriorProbabilitiesUtils.calculatePosteriorProbs(testOverlappingBase,supplTest1,0,0.001,true,false,false);
}
 
Example #30
Source File: PosteriorProbabilitiesUtilsUnitTest.java    From gatk-protected with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
@Test(expectedExceptions = {UserException.class})
public void testWrongNumberMLEACvalues() {
    final VariantContext testOverlappingBase = makeVC("1", Arrays.asList(Aref,T), makeG("s1",T,T,40,20,0),
            makeG("s2",Aref,T,18,0,24),
            makeG("s3",Aref,T,22,0,12));
    final List<VariantContext> supplTest1 = new ArrayList<>(1);
    supplTest1.add(new VariantContextBuilder(makeVC("2", Arrays.asList(Aref,T,C))).attribute(GATKVCFConstants.MLE_ALLELE_COUNT_KEY,5).attribute(VCFConstants.ALLELE_NUMBER_KEY,10).make());
    final VariantContext test1result = PosteriorProbabilitiesUtils.calculatePosteriorProbs(testOverlappingBase,supplTest1,0,0.001,true,false,false);
}