htsjdk.variant.variantcontext.VariantContext Java Examples

The following examples show how to use htsjdk.variant.variantcontext.VariantContext. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: StructuralVariationDiscoveryPipelineSpark.java    From gatk with BSD 3-Clause "New" or "Revised" License 6 votes vote down vote up
private static void experimentalInterpretation(final JavaSparkContext ctx,
                                               final FindBreakpointEvidenceSpark.AssembledEvidenceResults assembledEvidenceResults,
                                               final SvDiscoveryInputMetaData svDiscoveryInputMetaData) {

    final JavaRDD<GATKRead> assemblyRawAlignments = getContigRawAlignments(ctx, assembledEvidenceResults, svDiscoveryInputMetaData);

    final String updatedOutputPath = svDiscoveryInputMetaData.getOutputPath() + "experimentalInterpretation_";
    svDiscoveryInputMetaData.updateOutputPath(updatedOutputPath);

    SvDiscoverFromLocalAssemblyContigAlignmentsSpark.AssemblyContigsClassifiedByAlignmentSignatures contigsByPossibleRawTypes
            = SvDiscoverFromLocalAssemblyContigAlignmentsSpark.preprocess(svDiscoveryInputMetaData, assemblyRawAlignments);

    final List<VariantContext> variants = SvDiscoverFromLocalAssemblyContigAlignmentsSpark
            .dispatchJobs(ctx, contigsByPossibleRawTypes, svDiscoveryInputMetaData, assemblyRawAlignments, true);
    contigsByPossibleRawTypes.unpersist();

    SvDiscoverFromLocalAssemblyContigAlignmentsSpark.filterAndWriteMergedVCF(updatedOutputPath, variants, svDiscoveryInputMetaData);
}
 
Example #2
Source File: IndelSize.java    From gatk with BSD 3-Clause "New" or "Revised" License 6 votes vote down vote up
public List<Object> getRelevantStates(ReferenceContext referenceContext, ReadsContext readsContext, FeatureContext featureContext, VariantContext comp, String compName, VariantContext eval, String evalName, String sampleName, String FamilyName) {
    if (eval != null && eval.isIndel() && eval.isBiallelic()) {
        try {
            int eventLength = 0;
            if ( eval.isSimpleInsertion() ) {
                eventLength = eval.getAlternateAllele(0).length();
            } else if ( eval.isSimpleDeletion() ) {
                eventLength = -eval.getReference().length();
            }

            if (eventLength > MAX_INDEL_SIZE)
                eventLength = MAX_INDEL_SIZE;
            else if (eventLength < -MAX_INDEL_SIZE)
                eventLength = -MAX_INDEL_SIZE;

            return Collections.singletonList((Object)eventLength);
        } catch (Exception e) {
            return Collections.emptyList();
        }
    }

    return Collections.emptyList();
}
 
Example #3
Source File: ReblockGVCFIntegrationTest.java    From gatk with BSD 3-Clause "New" or "Revised" License 6 votes vote down vote up
@Test
public void testMQHeadersAreUpdated() throws Exception {
    final File output = createTempFile("reblockedgvcf", ".vcf");
    final ArgumentsBuilder args = new ArgumentsBuilder();
    args.add("V", getToolTestDataDir() + "justHeader.g.vcf")
            .addOutput(output);
    runCommandLine(args);

    Pair<VCFHeader, List<VariantContext>> actual = VariantContextTestUtils.readEntireVCFIntoMemory(output.getAbsolutePath());
    VCFHeader header = actual.getLeft();
    List<VCFInfoHeaderLine> infoLines = new ArrayList<>(header.getInfoHeaderLines());
    //check all the headers in case there's one old and one updated
    for (final VCFInfoHeaderLine line : infoLines) {
        if (line.getID().equals(GATKVCFConstants.RAW_RMS_MAPPING_QUALITY_DEPRECATED)) {
            Assert.assertTrue(line.getType().equals(VCFHeaderLineType.Float));
            Assert.assertTrue(line.getDescription().contains("deprecated"));
        } else if (line.getID().equals(GATKVCFConstants.MAPPING_QUALITY_DEPTH_DEPRECATED)) {
            Assert.assertTrue(line.getDescription().contains("deprecated"));
        }
    }
}
 
Example #4
Source File: AS_RankSumTest.java    From gatk with BSD 3-Clause "New" or "Revised" License 6 votes vote down vote up
private String makeReducedAnnotationString(final VariantContext vc, final Map<Allele,Double> perAltRankSumResults) {
    final StringBuilder annotationString = new StringBuilder();
    for (final Allele a : vc.getAlternateAlleles()) {
        if (annotationString.length() != 0) {
            annotationString.append(AnnotationUtils.ALLELE_SPECIFIC_REDUCED_DELIM);
        }
        if (!perAltRankSumResults.containsKey(a)) {
            logger.warn("VC allele not found in annotation alleles -- maybe there was trimming?  Allele " + a.getDisplayString() + " will be marked as missing.");
            annotationString.append(VCFConstants.MISSING_VALUE_v4); //add the missing value or the array size and indexes won't check out
        } else {
            final Double numericAlleleValue = perAltRankSumResults.get(a);
            final String perAlleleValue = numericAlleleValue != null ? String.format("%.3f", numericAlleleValue) : VCFConstants.MISSING_VALUE_v4;
            annotationString.append(perAlleleValue);
        }
    }
    return annotationString.toString();
}
 
Example #5
Source File: GatherVcfsCloudIntegrationTest.java    From gatk with BSD 3-Clause "New" or "Revised" License 6 votes vote down vote up
@SuppressWarnings({"unchecked"})
private void assertGatherProducesCorrectVariants(GatherVcfsCloud.GatherType gatherType, File expected, List<File> inputs) throws IOException {
    final File output = createTempFile("gathered", ".vcf.gz");
    final ArgumentsBuilder args = new ArgumentsBuilder();
    inputs.forEach(args::addInput);
    args.addOutput(output)
            .add(GatherVcfsCloud.GATHER_TYPE_LONG_NAME, gatherType.toString());
    runCommandLine(args);

    try (final AbstractFeatureReader<VariantContext, ?> actualReader = AbstractFeatureReader.getFeatureReader(output.getAbsolutePath(), null, new VCFCodec(), false, Function.identity(), Function.identity());
         final AbstractFeatureReader<VariantContext, ?> expectedReader = AbstractFeatureReader.getFeatureReader(expected.getAbsolutePath(), null, new VCFCodec(), false, Function.identity(), Function.identity())) {

        final List<VariantContext> actualVariants = StreamSupport.stream(Spliterators.spliteratorUnknownSize(actualReader.iterator(),Spliterator.ORDERED), false).collect(Collectors.toList());
        final List<VariantContext> expectedVariants = StreamSupport.stream(Spliterators.spliteratorUnknownSize(expectedReader.iterator(),Spliterator.ORDERED), false).collect(Collectors.toList());
        VariantContextTestUtils.assertEqualVariants(actualVariants, expectedVariants);

        Assert.assertEquals(((VCFHeader) actualReader.getHeader()).getMetaDataInInputOrder(),
                ((VCFHeader) expectedReader.getHeader()).getMetaDataInInputOrder());
    }
}
 
Example #6
Source File: SelectVariantsUnitTest.java    From gatk with BSD 3-Clause "New" or "Revised" License 6 votes vote down vote up
@Test(dataProvider = "MaxMinIndelSize")
public void maxIndelSizeTest(final int size, final int otherSize, final int max, final int min, final String op) {

    final byte[] largerAllele = Utils.dupBytes((byte) 'A', size+1);
    final byte[] smallerAllele = Utils.dupBytes((byte) 'A', 1);

    final List<Allele> alleles = new ArrayList<>(2);
    final Allele ref = Allele.create(op.equals("I") ? smallerAllele : largerAllele, true);
    final Allele alt = Allele.create(op.equals("D") ? smallerAllele : largerAllele, false);
    alleles.add(ref);
    alleles.add(alt);

    final VariantContext vc = new VariantContextBuilder("test", "1", 10, 10 + ref.length() - 1, alleles).make();

    final boolean hasIndelTooLargeOrSmall = SelectVariants.containsIndelLargerOrSmallerThan(vc, max, min);
    Assert.assertEquals(hasIndelTooLargeOrSmall, size > max || size < min);
}
 
Example #7
Source File: ReadOrientationFilter.java    From gatk with BSD 3-Clause "New" or "Revised" License 6 votes vote down vote up
@VisibleForTesting
double artifactProbability(final ReferenceContext referenceContext, final VariantContext vc, final Genotype g) {
    // As of June 2018, genotype is hom ref iff we have the normal sample, but this may change in the future
    // TODO: handle MNVs
    if (g.isHomRef() || (!vc.isSNP() && !vc.isMNP()) ){
        return 0;
    } else if (!artifactPriorCollections.containsKey(g.getSampleName())) {
        return 0;
    }

    final double[] tumorLods = VariantContextGetters.getAttributeAsDoubleArray(vc, GATKVCFConstants.TUMOR_LOG_10_ODDS_KEY, () -> null, -1);
    final int indexOfMaxTumorLod = MathUtils.maxElementIndex(tumorLods);
    final Allele altAllele = vc.getAlternateAllele(indexOfMaxTumorLod);
    final byte[] altBases = altAllele.getBases();

    // for MNVs, treat each base as an independent substitution and take the maximum of all error probabilities
    return IntStream.range(0, altBases.length).mapToDouble(n -> {
        final Nucleotide altBase = Nucleotide.valueOf(new String(new byte[] {altBases[n]}));

        return artifactProbability(referenceContext, vc.getStart() + n, g, indexOfMaxTumorLod, altBase);
    }).max().orElse(0.0);

}
 
Example #8
Source File: MNVMerger.java    From hmftools with GNU General Public License v3.0 6 votes vote down vote up
@NotNull
private static List<Allele> createMnvAlleles(@NotNull final List<VariantContext> variants,
        @NotNull final Map<Integer, Character> gapReads) {
    final StringBuilder refBases = new StringBuilder();
    final StringBuilder altBases = new StringBuilder();
    int currentPosition = variants.get(0).getStart();
    for (final VariantContext variant : variants) {
        while (currentPosition != variant.getStart()) {
            final Character gapRead = gapReads.get(currentPosition);
            refBases.append(gapRead);
            altBases.append(gapRead);
            currentPosition++;
        }
        refBases.append(variant.getReference().getBaseString());
        altBases.append(variant.getAlternateAllele(0).getBaseString());
        currentPosition = variant.getStart() + variant.getReference().length();
    }
    final Allele ref = Allele.create(refBases.toString(), true);
    final Allele alt = Allele.create(altBases.toString(), false);
    return Lists.newArrayList(ref, alt);
}
 
Example #9
Source File: SomaticRefContextEnrichment.java    From hmftools with GNU General Public License v3.0 6 votes vote down vote up
@NotNull
static Pair<Integer, String> relativePositionAndRef(@NotNull final IndexedFastaSequenceFile reference, @NotNull final VariantContext variant) {
    final int refLength = variant.getReference().getBaseString().length();
    @Nullable
    final SAMSequenceRecord samSequenceRecord = reference.getSequenceDictionary().getSequence(variant.getContig());
    if (samSequenceRecord == null) {
        LOGGER.warn("Unable to locate contig {} in ref genome", variant.getContig());
        return new Pair<>(-1, Strings.EMPTY);
    }

    final int chromosomeLength = samSequenceRecord.getSequenceLength();
    long positionBeforeEvent = variant.getStart();

    long start = Math.max(positionBeforeEvent - 100, 1);
    long end = Math.min(positionBeforeEvent + refLength + 100 - 1, chromosomeLength - 1);
    int relativePosition = (int) (positionBeforeEvent - start);
    final String sequence;
    if (start < chromosomeLength && end < chromosomeLength) {
        sequence = reference.getSubsequenceAt(variant.getContig(), start, end).getBaseString();
    } else {
        sequence = Strings.EMPTY;
        LOGGER.warn("Requested base sequence outside of chromosome region!");
    }
    return new Pair<>(relativePosition, sequence);
}
 
Example #10
Source File: VariantContextSingletonFilterTest.java    From Drop-seq with MIT License 6 votes vote down vote up
@Test
public void filterOutHetSinglto32() {
 VariantContextBuilder b = new VariantContextBuilder();
 Allele a1 = Allele.create("A", true);
 Allele a2 = Allele.create("T", false);

 final List<Allele> allelesHet = new ArrayList<>(Arrays.asList(a1,a2));
 final List<Allele> allelesRef = new ArrayList<>(Arrays.asList(a1,a1));
 final List<Allele> allelesAlt = new ArrayList<>(Arrays.asList(a2,a2));

 // this does not have a het singleton because it has an alt.
 Collection<Genotype> genotypes = new ArrayList<>();
 genotypes.add(GenotypeBuilder.create("donor1", allelesRef));
 genotypes.add(GenotypeBuilder.create("donor2", allelesRef));
 genotypes.add(GenotypeBuilder.create("donor3", allelesAlt));

 VariantContext vc = b.alleles(allelesHet).start(1).stop(1).chr("1").genotypes(genotypes).make();
 Assert.assertNotNull(vc);

 Iterator<VariantContext> underlyingIterator = Collections.emptyIterator();

 VariantContextSingletonFilter f = new VariantContextSingletonFilter(underlyingIterator, true);
 boolean t1 = f.filterOut(vc);
 Assert.assertTrue(t1);

}
 
Example #11
Source File: CNVInputReader.java    From gatk with BSD 3-Clause "New" or "Revised" License 6 votes vote down vote up
/**
 * Loads an external cnv call list and returns the results in an SVIntervalTree. NB: the contig indices in the SVIntervalTree
 * are based on the sequence indices in the SAM header, _NOT_ the ReadMetadata (which we might not have access to at this
 * time).
 */
public static SVIntervalTree<VariantContext> loadCNVCalls(final String cnvCallsFile,
                                                           final SAMFileHeader headerForReads) {
    Utils.validate(cnvCallsFile != null, "Can't load null CNV calls file");
    try ( final FeatureDataSource<VariantContext> dataSource = new FeatureDataSource<>(cnvCallsFile, null, 0, null) ) {

        final String sampleId = SVUtils.getSampleId(headerForReads);
        validateCNVcallDataSource(headerForReads, sampleId, dataSource);

        final SVIntervalTree<VariantContext> cnvCallTree = new SVIntervalTree<>();
        Utils.stream(dataSource.iterator())
                .map(vc -> new VariantContextBuilder(vc).genotypes(vc.getGenotype(sampleId)).make()) // forces a decode of the genotype for serialization purposes
                .map(vc -> new Tuple2<>(new SVInterval(headerForReads.getSequenceIndex(vc.getContig()), vc.getStart(), vc.getEnd()),vc))
                .forEach(pv -> cnvCallTree.put(pv._1(), pv._2()));
        return cnvCallTree;
    }
}
 
Example #12
Source File: CombineGVCFsIntegrationTest.java    From gatk with BSD 3-Clause "New" or "Revised" License 6 votes vote down vote up
@Test()
public void testTetraploidRun() throws IOException {
    final File output = createTempFile("combinegvcfs", ".vcf");

    final ArgumentsBuilder args = new ArgumentsBuilder();
    args.addReference(new File(b37_reference_20_21))
            .addOutput(output);
    args.add("variant",getToolTestDataDir()+"tetraploid-gvcf-1.vcf");
    args.add("variant",getToolTestDataDir()+"tetraploid-gvcf-2.vcf");
    args.add("variant",getToolTestDataDir()+"tetraploid-gvcf-3.vcf");
    args.add("intervals", getToolTestDataDir() + "tetraploid-gvcfs.interval_list");

    runCommandLine(args);

    final List<VariantContext> expectedVC = getVariantContexts(getTestFile("tetraploidRun.GATK3.g.vcf"));
    final List<VariantContext> actualVC = getVariantContexts(output);
    final VCFHeader header = getHeaderFromFile(output);
    assertForEachElementInLists(actualVC, expectedVC, (a, e) -> VariantContextTestUtils.assertVariantContextsAreEqualAlleleOrderIndependent(a, e, Arrays.asList(), Collections.emptyList(), header));

}
 
Example #13
Source File: RemoveNearbyIndels.java    From gatk-protected with BSD 3-Clause "New" or "Revised" License 6 votes vote down vote up
public void add(final VariantContext vc) {
    if (lastIndel == null && vc.isIndel()) {
        buffer.add(vc);
        lastIndel = vc;
    } else if (nearby(lastIndel, vc)) {
        if (vc.isIndel()) {
            emitAllNonIndels(); // throw out {@code lastIndel} and {@code vc}
        } else {
            buffer.add(vc);
        }
    } else {
        emitAllVariants();
        buffer.add(vc);
    }

    lastIndel = vc.isIndel() ? vc : lastIndel;
}
 
Example #14
Source File: AnnotatedVariantProducer.java    From gatk with BSD 3-Clause "New" or "Revised" License 6 votes vote down vote up
@VisibleForTesting
static VariantContextBuilder annotateWithExternalCNVCalls(final String recordContig, final int pos, final int end,
                                                          final VariantContextBuilder inputBuilder,
                                                          final Broadcast<SAMSequenceDictionary> broadcastSequenceDictionary,
                                                          final Broadcast<SVIntervalTree<VariantContext>> broadcastCNVCalls,
                                                          final String sampleId) {
    if (broadcastCNVCalls == null)
        return inputBuilder;
    final SVInterval variantInterval = new SVInterval(broadcastSequenceDictionary.getValue().getSequenceIndex(recordContig), pos, end);
    final SVIntervalTree<VariantContext> cnvCallTree = broadcastCNVCalls.getValue();
    final String cnvCallAnnotation =
            Utils.stream(cnvCallTree.overlappers(variantInterval))
                    .map(overlapper -> formatExternalCNVCallAnnotation(overlapper.getValue(), sampleId))
                    .collect(Collectors.joining(VCFConstants.INFO_FIELD_ARRAY_SEPARATOR));
    if (!cnvCallAnnotation.isEmpty()) {
        return inputBuilder.attribute(GATKSVVCFConstants.EXTERNAL_CNV_CALLS, cnvCallAnnotation);
    } else
        return inputBuilder;
}
 
Example #15
Source File: FilterMutectCalls.java    From gatk with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
@Override
protected void nthPassApply(final VariantContext variant,
                            final ReadsContext readsContext,
                            final ReferenceContext referenceContext,
                            final FeatureContext featureContext,
                            final int n) {
    ParamUtils.isPositiveOrZero(n, "Passes must start at the 0th pass.");
    if (n <= NUMBER_OF_LEARNING_PASSES) {
        filteringEngine.accumulateData(variant, referenceContext);
    } else if (n == NUMBER_OF_LEARNING_PASSES + 1) {
        vcfWriter.add(filteringEngine.applyFiltersAndAccumulateOutputStats(variant, referenceContext));
    } else {
        throw new GATKException.ShouldNeverReachHereException("This walker should never reach (zero-indexed) pass " + n);
    }
}
 
Example #16
Source File: VariantHotspotFile.java    From hmftools with GNU General Public License v3.0 5 votes vote down vote up
@NotNull
private static VariantHotspot fromVariantContext(@NotNull final VariantContext context) {
    return ImmutableVariantHotspotImpl.builder()
            .chromosome(context.getContig())
            .position(context.getStart())
            .ref(context.getReference().getBaseString())
            .alt(context.getAlternateAllele(0).getBaseString())
            .build();
}
 
Example #17
Source File: CNNScoreVariantsIntegrationTest.java    From gatk with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
private void assertInfoFieldsAreClose(File actualVcf, File expectedVcf, String infoKey){
    Iterator<VariantContext> expectedVi = VariantContextTestUtils.streamVcf(expectedVcf).collect(Collectors.toList()).iterator();
    Iterator<VariantContext> actualVi = VariantContextTestUtils.streamVcf(actualVcf).collect(Collectors.toList()).iterator();
    while (expectedVi.hasNext() && actualVi.hasNext()) {
        VariantContext expectedVc = expectedVi.next();
        VariantContext actualVc = actualVi.next();
        double expectedScore = expectedVc.getAttributeAsDouble(infoKey, 0.0); // Different defaults trigger failures on missing scores
        double actualScore = actualVc.getAttributeAsDouble(infoKey, EPSILON+1.0);
        double diff = Math.abs(expectedScore-actualScore);
        Assert.assertTrue(diff < EPSILON);
        VariantContextTestUtils.assertVariantContextsAreEqual(actualVc, expectedVc, Collections.singletonList(infoKey), Collections.emptyList());
    }
    Assert.assertTrue(!expectedVi.hasNext() && !actualVi.hasNext());
}
 
Example #18
Source File: CountingVariantFilter.java    From gatk with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
@Override
public boolean test(final VariantContext variant) {
    final boolean accept = delegateFilter.test(variant);
    if (!accept) {
        filteredCount++;
    }
    return accept;
}
 
Example #19
Source File: VariantWalkerIntegrationTest.java    From gatk with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
@Override
public void apply(
        VariantContext variant,
        ReadsContext readsContext, ReferenceContext referenceContext, FeatureContext featureContext ) {
    Assert.assertEquals(readsContext.hasBackingDataSource(), hasBackingReadSource);
    if (hasBackingReadSource) {
        Iterator<GATKRead> it = readsContext.iterator();
        while (it.hasNext()) {
            Assert.assertTrue(backingReads.contains(it.next().getName()));
        }
    }
}
 
Example #20
Source File: SimpleTsvOutputRendererUnitTest.java    From gatk with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
@Test(dataProvider = "provideForAliasing", description = "Test aliasing both when funcotation fields get to the output and when that is disallowed.")
public void testAliasing(final LinkedHashMap<String, String> unaccountedForDefaultAnnotations,
                         final LinkedHashMap<String, String> unaccountedForOverrideAnnotations,
                         final Set<String> excludedOutputFields, final boolean isWriteFuncotationFieldsNotInConfig,
                         final LinkedHashMap<String, List<String>> columnNameToAliasesMap,
                         final VariantContext segVC, final FuncotationMap funcotationMap,
                         final List<String> gtAliasKeys, final List<String> gtAliasFuncotationFields, final List<String> gtFinalValues) throws IOException {
    final File outputFile = File.createTempFile("testAliasing", ".seg");
    final SimpleTsvOutputRenderer simpleTsvOutputRenderer = new SimpleTsvOutputRenderer(outputFile.toPath(),
            unaccountedForDefaultAnnotations, unaccountedForOverrideAnnotations, excludedOutputFields,
            columnNameToAliasesMap, "TESTING_VERSION", isWriteFuncotationFieldsNotInConfig);

    // You must write one record since SimpleTsvOutputRenderer lazy loads the writer.
    simpleTsvOutputRenderer.write(segVC, funcotationMap);

    // Test that all columns are in the output and the proper aliases are set up.  A whitebox test.
    final LinkedHashMap<String, String> columnNameToFuncotationFieldMap = simpleTsvOutputRenderer.getColumnNameToFuncotationFieldMap();
    Assert.assertEquals(columnNameToFuncotationFieldMap.keySet(), new LinkedHashSet<>(gtAliasKeys), "Alias key did not correspond to ground truth.");
    Assert.assertEquals(Lists.newArrayList(columnNameToFuncotationFieldMap.values()), gtAliasFuncotationFields, "Alias value did not correspond to ground truth.");

    // Check the aliasing to the values.  A whitebox test
    // This next line assumes that all test funcotation maps have the same tx ID (None) and alternate allele (copy neutral)
    final LinkedHashMap<String, String> columnNameToFuncotationValueMap = SimpleTsvOutputRenderer.createColumnNameToValueMap(columnNameToFuncotationFieldMap,
            funcotationMap, FuncotationMap.NO_TRANSCRIPT_AVAILABLE_KEY, AnnotatedIntervalToSegmentVariantContextConverter.COPY_NEUTRAL_ALLELE, unaccountedForDefaultAnnotations,
            unaccountedForOverrideAnnotations, excludedOutputFields);
    Assert.assertEquals(columnNameToFuncotationValueMap.values(), gtFinalValues);

    // Get the actual values in the columns
    simpleTsvOutputRenderer.close();

    // Check the output file contents
    final List<LinkedHashMap<String, String>> gtOutputRecords = Collections.singletonList(
            FuncotatorUtils.createLinkedHashMapFromLists(gtAliasKeys, gtFinalValues));
    FuncotatorTestUtils.assertTsvFile(outputFile, gtOutputRecords);
}
 
Example #21
Source File: SVVCFReader.java    From gatk with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
public static SVIntervalTree<String> readBreakpointsFromTruthVCF(final String truthVCF,
                                                                 final SAMSequenceDictionary dictionary,
                                                                 final int padding ) {
    SVIntervalTree<String> breakpoints = new SVIntervalTree<>();
    try ( final FeatureDataSource<VariantContext> dataSource =
                  new FeatureDataSource<>(truthVCF, null, 0, VariantContext.class) ) {
        for ( final VariantContext vc : dataSource ) {
            final StructuralVariantType svType = vc.getStructuralVariantType();
            if ( svType == null ) continue;
            final String eventName = vc.getID();
            final int contigID = dictionary.getSequenceIndex(vc.getContig());
            if ( contigID < 0 ) {
                throw new UserException("VCF contig " + vc.getContig() + " does not appear in dictionary.");
            }
            final int start = vc.getStart();
            switch ( svType ) {
                case DEL:
                case INV:
                case CNV:
                    final int end = vc.getEnd();
                    breakpoints.put(new SVInterval(contigID,start-padding, end+padding), eventName);
                    break;
                case INS:
                case DUP:
                case BND:
                    breakpoints.put(new SVInterval(contigID,start-padding, start+padding), eventName);
                    break;
            }
        }
    }
    return breakpoints;
}
 
Example #22
Source File: VcfOutputRendererUnitTest.java    From gatk with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
/** Test that the exclusion list overrides the manually specified annotations */
@Test
public void testExclusionListOverridesManualDefaultAnnotations() {
    final Pair<VCFHeader, List<VariantContext>> entireInputVcf =  VariantContextTestUtils.readEntireVCFIntoMemory(TEST_VCF);
    final File outFile = createTempFile("vcf_output_renderer_exclusion", ".vcf");
    final VariantContextWriter vcfWriter = GATKVariantContextUtils.createVCFWriter(outFile.toPath(),null, false);

    final LinkedHashMap<String, String> dummyDefaults = new LinkedHashMap<>();
    dummyDefaults.put("FOO", "BAR");
    dummyDefaults.put("BAZ", "HUH?");

    final VcfOutputRenderer vcfOutputRenderer = new VcfOutputRenderer(vcfWriter,
        new ArrayList<>(), entireInputVcf.getLeft(), new LinkedHashMap<>(dummyDefaults),
        new LinkedHashMap<>(),
        new HashSet<>(), new HashSet<>(Arrays.asList("BAZ", "AC")), "Unknown");

    final VariantContext variant = entireInputVcf.getRight().get(0);

    final FuncotationMap funcotationMap = FuncotationMap.createNoTranscriptInfo(Collections.emptyList());
    vcfOutputRenderer.write(variant, funcotationMap);
    vcfOutputRenderer.close();

    // Check the output
    final Pair<VCFHeader, List<VariantContext>> tempVcf =  VariantContextTestUtils.readEntireVCFIntoMemory(outFile.getAbsolutePath());
    final VariantContext tempVariant = tempVcf.getRight().get(0);
    final String[] funcotatorKeys = FuncotatorUtils.extractFuncotatorKeysFromHeaderDescription(tempVcf.getLeft().getInfoHeaderLine("FUNCOTATION").getDescription());
    Assert.assertEquals(funcotatorKeys.length,1);
    Assert.assertEquals(funcotatorKeys[0],"FOO");
    final FuncotationMap tempFuncotationMap =
            FuncotationMap.createAsAllTableFuncotationsFromVcf(FuncotationMap.NO_TRANSCRIPT_AVAILABLE_KEY, funcotatorKeys,
                    tempVariant.getAttributeAsString("FUNCOTATION", ""), tempVariant.getAlternateAllele(0), "TEST");
    Assert.assertTrue(tempFuncotationMap.get(FuncotationMap.NO_TRANSCRIPT_AVAILABLE_KEY).get(0).hasField("FOO"));
    Assert.assertEquals(tempFuncotationMap.get(FuncotationMap.NO_TRANSCRIPT_AVAILABLE_KEY).get(0).getField("FOO"), "BAR");
    Assert.assertFalse(tempFuncotationMap.get(FuncotationMap.NO_TRANSCRIPT_AVAILABLE_KEY).get(0).hasField("BAZ"));

    // IMPORTANT: If the field is not a proper funcotation in VCFs, it will not be excluded.  I.e. if an input VCF has an excluded field, it will not be excluded.
    Assert.assertEquals(tempVariant.getAttribute("AC"), "1");
}
 
Example #23
Source File: LiftoverVcfTest.java    From picard with MIT License 5 votes vote down vote up
@Test(dataProvider = "leftAlignAllelesData")
public void testLeftAlignVariants(final VariantContext source, final ReferenceSequence reference, final VariantContext result) {
    VariantContextBuilder vcb = new VariantContextBuilder(source);

    LiftoverUtils.leftAlignVariant(vcb, source.getStart(), source.getEnd(), source.getAlleles(), reference);
    vcb.genotypes(LiftoverUtils.fixGenotypes(source.getGenotypes(), source.getAlleles(), vcb.getAlleles()));

    VcfTestUtils.assertEquals(vcb.make(), result);
}
 
Example #24
Source File: CoverageUnitTest.java    From gatk with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
private VariantContext makeVC() {
    final GenotypesContext testGC = GenotypesContext.create(2);
    final Allele refAllele = Allele.create("A", true);
    final Allele altAllele = Allele.create("T");

    return (new VariantContextBuilder())
            .alleles(Arrays.asList(refAllele, altAllele)).chr("1").start(15L).stop(15L).genotypes(testGC).make();
}
 
Example #25
Source File: OrientationBiasUtils.java    From gatk-protected with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
/** Includes complements.  Excludes filtered variant contexts.  Excludes genotypes that were filtered by something other than the orientation bias filter. */
@VisibleForTesting
static long calculateNumTransition(final String sampleName, final List<VariantContext> variantContexts, final Transition transition) {
    final Transition complement = transition.complement();
    return getNumArtifactGenotypeStream(sampleName, variantContexts, transition, complement)
            .filter(g -> (g.getFilters() == null) || (g.getFilters().equals(OrientationBiasFilterConstants.IS_ORIENTATION_BIAS_CUT)))
            .count();
}
 
Example #26
Source File: ChromosomePipeline.java    From hmftools with GNU General Public License v3.0 5 votes vote down vote up
public ChromosomePipeline(@NotNull final String chromosome, @NotNull final SageConfig config, @NotNull final Executor executor,
        @NotNull final List<VariantHotspot> hotspots, @NotNull final List<GenomeRegion> panelRegions,
        @NotNull final List<GenomeRegion> highConfidenceRegions, final Map<String, QualityRecalibrationMap> qualityRecalibrationMap,
        final Consumer<VariantContext> consumer)
        throws IOException {
    this.chromosome = chromosome;
    this.config = config;
    this.refGenome = new IndexedFastaSequenceFile(new File(config.refGenome()));
    this.consumer = consumer;
    this.sageVariantPipeline =
            new SomaticPipeline(config, executor, refGenome, hotspots, panelRegions, highConfidenceRegions, qualityRecalibrationMap);
}
 
Example #27
Source File: SimpleKeyXsvFuncotationFactory.java    From gatk with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
private List<Funcotation> createDefaultFuncotationsOnVariantHelper( final VariantContext variant, final ReferenceContext referenceContext, final Set<Allele> annotatedAltAlleles  ) {

        final List<Funcotation> funcotationList = new ArrayList<>();

        final List<Allele> alternateAlleles = variant.getAlternateAlleles();

        for ( final Allele altAllele : alternateAlleles ) {
            if ( !annotatedAltAlleles.contains(altAllele) ) {
                funcotationList.add(TableFuncotation.create(annotationColumnNames, emptyAnnotationList, altAllele, name, null));
            }
        }

        return funcotationList;
    }
 
Example #28
Source File: KataegisQueueTest.java    From hmftools with GNU General Public License v3.0 5 votes vote down vote up
@Test
public void testAverageDistanceSpread() {
    final VariantContext context1 = create("1", 1101, true);
    final VariantContext context2 = create("1", 2102, true);
    final VariantContext context3 = create("1", 3103, true);
    final VariantContext context4 = create("1", 4104, true);
    final VariantContext context5 = create("1", 5105, true);
    final VariantContext context6 = create("1", 6103, true);

    List<VariantContext> result = kataegis(Lists.newArrayList(context1, context2, context3, context4, context5, context6));
    assertEquals(6, result.size());
    assertEquals("TST_1", result.get(0).getAttribute(KataegisEnrichment.KATAEGIS_FLAG));
}
 
Example #29
Source File: UpdateVcfSequenceDictionary.java    From picard with MIT License 5 votes vote down vote up
@Override
protected int doWork() {
    IOUtil.assertFileIsReadable(INPUT);
    IOUtil.assertFileIsReadable(SEQUENCE_DICTIONARY);
    IOUtil.assertFileIsWritable(OUTPUT);

    final SAMSequenceDictionary samSequenceDictionary = SAMSequenceDictionaryExtractor.extractDictionary(SEQUENCE_DICTIONARY.toPath());

    final VCFFileReader fileReader = new VCFFileReader(INPUT, false);
    final VCFHeader fileHeader = fileReader.getFileHeader();

    final VariantContextWriterBuilder builder = new VariantContextWriterBuilder()
            .setReferenceDictionary(samSequenceDictionary)
            .clearOptions();
    if (CREATE_INDEX)
        builder.setOption(Options.INDEX_ON_THE_FLY);

    final VariantContextWriter vcfWriter = builder.setOutputFile(OUTPUT).build();
    fileHeader.setSequenceDictionary(samSequenceDictionary);
    vcfWriter.writeHeader(fileHeader);

    final ProgressLogger progress = new ProgressLogger(log, 10000);
    final CloseableIterator<VariantContext> iterator = fileReader.iterator();
    while (iterator.hasNext()) {
        final VariantContext context = iterator.next();
        vcfWriter.add(context);
        progress.record(context.getContig(), context.getStart());
    }

    CloserUtil.close(iterator);
    CloserUtil.close(fileReader);
    vcfWriter.close();

    return 0;
}
 
Example #30
Source File: StrelkaPostProcessTest.java    From hmftools with GNU General Public License v3.0 5 votes vote down vote up
@Test
public void filtersHCRegionIndelBelowThreshold() {
    final VariantContext indelVariant = VARIANTS.get(10);
    assertEquals(0.5, allelicFrequency(indelVariant), 0.001);
    assertEquals(2, qualityScore(indelVariant));
    assertTrue(hcBed.includes(variantGenomePosition(indelVariant)));
    assertFalse(victim.test(indelVariant));
}