Java Code Examples for htsjdk.variant.variantcontext.VariantContext#getID()

The following examples show how to use htsjdk.variant.variantcontext.VariantContext#getID() . You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: SomaticVariantFactory.java    From hmftools with GNU General Public License v3.0 6 votes vote down vote up
private static void attachIDAndCosmicAnnotations(@NotNull final ImmutableSomaticVariantImpl.Builder builder,
        @NotNull VariantContext context, @NotNull CanonicalAnnotation canonicalAnnotationFactory) {
    final String ID = context.getID();
    final List<String> cosmicIDs = Lists.newArrayList();
    if (!ID.isEmpty()) {
        final String[] ids = ID.split(ID_SEPARATOR);
        for (final String id : ids) {
            if (id.contains(DBSNP_IDENTIFIER)) {
                builder.dbsnpID(id);
            } else if (id.contains(COSMIC_IDENTIFIER)) {
                cosmicIDs.add(id);
            }
        }
    }
    builder.cosmicIDs(cosmicIDs);

    final List<CosmicAnnotation> cosmicAnnotations = CosmicAnnotationFactory.fromContext(context);
    final Optional<CosmicAnnotation> canonicalCosmicAnnotation =
            canonicalAnnotationFactory.canonicalCosmicAnnotation(cosmicAnnotations);

    if (canonicalCosmicAnnotation.isPresent()) {
        builder.canonicalCosmicID(canonicalCosmicAnnotation.get().id());
    } else if (!cosmicIDs.isEmpty()) {
        builder.canonicalCosmicID(cosmicIDs.get(0));
    }
}
 
Example 2
Source File: SegmentedCpxVariantSimpleVariantExtractor.java    From gatk with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
@VisibleForTesting
RelevantAttributes(final VariantContext multiSegmentComplexVar) {
    id = multiSegmentComplexVar.getID();
    referenceSegments = SVUtils.getAttributeAsStringList(multiSegmentComplexVar, CPX_SV_REF_SEGMENTS)
            .stream().map(SimpleInterval::new).collect(Collectors.toList());
    altArrangements = SVUtils.getAttributeAsStringList(multiSegmentComplexVar, CPX_EVENT_ALT_ARRANGEMENTS);
}
 
Example 3
Source File: SegmentedCpxVariantSimpleVariantExtractor.java    From gatk with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
private AnnotatedInterval(final VariantContext vc) {
    sourceVC = vc;
    interval = new SimpleInterval( vc.getContig(), vc.getStart(), vc.getEnd());
    id = vc.getID();
    type = vc.getAttributeAsString(SVTYPE, "");
    svlen = vc.getAttributeAsInt(SVLEN, 0);
    alleles = vc.getAlleles();
}
 
Example 4
Source File: SegmentedCpxVariantSimpleVariantExtractor.java    From gatk with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
/**
 * Depending on how the ref segment is present in alt arrangement (if at all), logic as follows (order is important):
 * <ul>
 *     <li>
 *         if ref segment appear inverted and large enough
 *         <ul>
 *             <li> INV call is warranted </li>
 *             <li> INS call(s) before and after the INV, if inserted sequence long enough </li>
 *         </ul>
 *     </li>
 *
 *     <li>
 *         otherwise if ref segment is present as-is, i.e. no deletion call can be made,
 *         make insertion calls when possible
 *     </li>
 *     <li>
 *         otherwise
 *         <ul>
 *             <li> if the segment is large enough, make a DEL call, and insertion calls when possible </li>
 *             <li> otherwise a single fat INS call</li>
 *         </ul>
 *     </li>
 * </ul>
 *
 * <p>
 *     Note that the above logic has a bias towards getting INV calls, because
 *     when the (large enough) reference segment appears both as-is and inverted,
 *     the above logic will emit at least an INV call,
 *     whereas the (inverted) duplication(s) could also be reported as an DUP call as well, but...
 * </p>
 */
@Override
List<VariantContext> extract(final VariantContext complexVC, final ReferenceMultiSparkSource reference) {

    final List<String> segments = SVUtils.getAttributeAsStringList(complexVC, CPX_SV_REF_SEGMENTS);
    if (segments.isEmpty()) return whenZeroSegments(complexVC, reference);

    final SimpleInterval refSegment = new SimpleInterval(segments.get(0));
    final List<String> altArrangement = SVUtils.getAttributeAsStringList(complexVC, CPX_EVENT_ALT_ARRANGEMENTS);
    final int altSeqLength = complexVC.getAttributeAsString(SEQ_ALT_HAPLOTYPE, "").length();

    final List<VariantContextBuilder> result = new ArrayList<>();

    final int asIsAppearanceIdx = altArrangement.indexOf("1");
    final int invertedAppearanceIdx = altArrangement.indexOf("-1");
    if (invertedAppearanceIdx != -1 && refSegment.size() > EVENT_SIZE_THRESHOLD) { // inversion call
        whenInversionIsWarranted(refSegment, invertedAppearanceIdx, altArrangement, reference, result);
    } else if (asIsAppearanceIdx != -1) { // no inverted appearance or appear inverted but not large enough, and in the mean time appear as-is, so no deletion
        whenNoDeletionIsAllowed(refSegment, asIsAppearanceIdx, altArrangement, altSeqLength, reference, result);
    } else { // no as-is appearance && (inverted appearance might present not not large enough)
        whenNoInvAndNoAsIsAppearance(refSegment, altSeqLength, reference, result);
    }

    final String sourceID = complexVC.getID();
    final List<String> evidenceContigs = SVUtils.getAttributeAsStringList(complexVC, CONTIG_NAMES);
    final List<String> mappingQualities = SVUtils.getAttributeAsStringList(complexVC, MAPPING_QUALITIES);
    final int maxAlignLength = complexVC.getAttributeAsInt(MAX_ALIGN_LENGTH, 0);
    return result.stream()
            .map(vc -> vc.attribute(CPX_EVENT_KEY, sourceID).attribute(CONTIG_NAMES, evidenceContigs)
                         .attribute(MAPPING_QUALITIES, mappingQualities)
                         .attribute(MAX_ALIGN_LENGTH, maxAlignLength).make())
            .collect(Collectors.toList());
}
 
Example 5
Source File: SVVCFReader.java    From gatk with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
public static SVIntervalTree<String> readBreakpointsFromTruthVCF(final String truthVCF,
                                                                 final SAMSequenceDictionary dictionary,
                                                                 final int padding ) {
    SVIntervalTree<String> breakpoints = new SVIntervalTree<>();
    try ( final FeatureDataSource<VariantContext> dataSource =
                  new FeatureDataSource<>(truthVCF, null, 0, VariantContext.class) ) {
        for ( final VariantContext vc : dataSource ) {
            final StructuralVariantType svType = vc.getStructuralVariantType();
            if ( svType == null ) continue;
            final String eventName = vc.getID();
            final int contigID = dictionary.getSequenceIndex(vc.getContig());
            if ( contigID < 0 ) {
                throw new UserException("VCF contig " + vc.getContig() + " does not appear in dictionary.");
            }
            final int start = vc.getStart();
            switch ( svType ) {
                case DEL:
                case INV:
                case CNV:
                    final int end = vc.getEnd();
                    breakpoints.put(new SVInterval(contigID,start-padding, end+padding), eventName);
                    break;
                case INS:
                case DUP:
                case BND:
                    breakpoints.put(new SVInterval(contigID,start-padding, start+padding), eventName);
                    break;
            }
        }
    }
    return breakpoints;
}
 
Example 6
Source File: CreateSnpIntervalFromVcf.java    From Drop-seq with MIT License 4 votes vote down vote up
public IntervalList processData(final File vcfFile, final File sdFile, final Set<String> sample, int GQThreshold, final boolean hetSNPsOnly) {

		final VCFFileReader reader = new VCFFileReader(vcfFile, false);
		if (!VCFUtils.GQInHeader(reader)) {
			GQThreshold=-1;
			log.info("Genotype Quality [GQ] not found in header.  Disabling GQ_THRESHOLD parameter");
		}
		
		final VCFHeader inputVcfHeader = new VCFHeader(reader.getFileHeader().getMetaDataInInputOrder());
		SAMSequenceDictionary sequenceDictionary = inputVcfHeader.getSequenceDictionary();
		Set<String> sampleListFinal = sample;
		if (sample==null || sample.isEmpty()) {
			ArrayList<String> s = reader.getFileHeader().getSampleNamesInOrder();
			sampleListFinal=new TreeSet<String>(s);
		}

		if (sdFile != null)
			sequenceDictionary = getSequenceDictionary(sdFile);

		final ProgressLogger progress = new ProgressLogger(this.log, 500000);

		final SAMFileHeader samHeader = new SAMFileHeader();
		samHeader.setSequenceDictionary(sequenceDictionary);
		IntervalList result = new IntervalList(samHeader);

		// Go through the input, find sites we want to keep.
		final PeekableIterator<VariantContext> iterator = new PeekableIterator<>(reader.iterator());

		validateRequestedSamples (iterator, sampleListFinal);

		while (iterator.hasNext()) {
			final VariantContext site = iterator.next();
			progress.record(site.getContig(), site.getStart());
			// for now drop any filtered site.
			if (site.isFiltered())
				continue;
			// move onto the next record if the site is not a SNP or the samples aren't all heterozygous.
			if (!site.isSNP())
				continue;
			if (!sitePassesFilters(site, sampleListFinal, GQThreshold, hetSNPsOnly))
				continue;
			Interval varInt = new Interval(site.getContig(), site.getStart(),
					site.getEnd(), true, site.getID());

			// final Interval site = findHeterozygousSites(full, SAMPLE);
			result.add(varInt);

		}

		CloserUtil.close(iterator);
		CloserUtil.close(reader);
		return (result);
	}
 
Example 7
Source File: MergePedIntoVcf.java    From picard with MIT License 4 votes vote down vote up
/**
 * Writes out the VariantContext objects in the order presented to the supplied output file
 * in VCF format.
 */
private void writeVcf(final CloseableIterator<VariantContext> variants,
                      final File output,
                      final SAMSequenceDictionary dict,
                      final VCFHeader vcfHeader, ZCallPedFile zCallPedFile) {

    try(VariantContextWriter writer = new VariantContextWriterBuilder()
            .setOutputFile(output)
            .setReferenceDictionary(dict)
            .setOptions(VariantContextWriterBuilder.DEFAULT_OPTIONS)
            .build()) {
        writer.writeHeader(vcfHeader);
        while (variants.hasNext()) {
            final VariantContext context = variants.next();

            final VariantContextBuilder builder = new VariantContextBuilder(context);
            if (zCallThresholds.containsKey(context.getID())) {
                final String[] zThresh = zCallThresholds.get(context.getID());
                builder.attribute(InfiniumVcfFields.ZTHRESH_X, zThresh[0]);
                builder.attribute(InfiniumVcfFields.ZTHRESH_Y, zThresh[1]);
            }
            final Genotype originalGenotype = context.getGenotype(0);
            final Map<String, Object> newAttributes = originalGenotype.getExtendedAttributes();
            final VCFEncoder vcfEncoder = new VCFEncoder(vcfHeader, false, false);
            final Map<Allele, String> alleleMap = vcfEncoder.buildAlleleStrings(context);

            final String zCallAlleles = zCallPedFile.getAlleles(context.getID());
            if (zCallAlleles == null) {
                throw new PicardException("No zCall alleles found for snp " + context.getID());
            }
            final List<Allele> zCallPedFileAlleles = buildNewAllelesFromZCall(zCallAlleles, context.getAttributes());
            newAttributes.put(InfiniumVcfFields.GTA, alleleMap.get(originalGenotype.getAllele(0)) + UNPHASED + alleleMap.get(originalGenotype.getAllele(1)));
            newAttributes.put(InfiniumVcfFields.GTZ, alleleMap.get(zCallPedFileAlleles.get(0)) + UNPHASED + alleleMap.get(zCallPedFileAlleles.get(1)));

            final Genotype newGenotype = GenotypeBuilder.create(originalGenotype.getSampleName(), zCallPedFileAlleles,
                    newAttributes);
            builder.genotypes(newGenotype);
            logger.record("0", 0);
            // AC, AF, and AN are recalculated here
            VariantContextUtils.calculateChromosomeCounts(builder, false);
            final VariantContext newContext = builder.make();
            writer.add(newContext);
        }
    }
}
 
Example 8
Source File: HaplotypeMap.java    From picard with MIT License 4 votes vote down vote up
/**
 * Constructs a HaplotypeMap from the provided file.
 */

private void fromVcf(final File file) {
    try ( final VCFFileReader reader = new VCFFileReader(file, false)) {

        final SAMSequenceDictionary dict = reader.getFileHeader().getSequenceDictionary();

        if (dict == null || dict.getSequences().isEmpty()) {
            throw new IllegalStateException("Haplotype map VCF file must contain header: " + file.getAbsolutePath());
        }

        initialize(new SAMFileHeader(dict));

        final Map<String, HaplotypeBlock> anchorToHaplotype = new HashMap<>();

        for (final VariantContext vc : reader) {

            if (vc.getNSamples() > 1) {
                throw new IllegalStateException("Haplotype map VCF file must contain at most one sample: " + file.getAbsolutePath());
            }

            final Genotype gc = vc.getGenotype(0); // may be null
            final boolean hasGc = gc != null;

            if (vc.getAlternateAlleles().size() != 1) {
                throw new IllegalStateException("Haplotype map VCF file must contain exactly one alternate allele per site: " + vc.toString());
            }

            if (!vc.isSNP()) {
                throw new IllegalStateException("Haplotype map VCF file must contain only SNPs: " + vc.toString());
            }

            if (!vc.hasAttribute(VCFConstants.ALLELE_FREQUENCY_KEY)) {
                throw new IllegalStateException("Haplotype map VCF Variants must have an '"+ VCFConstants.ALLELE_FREQUENCY_KEY + "' INFO field: " + vc.toString());
            }


            if (hasGc && gc.isPhased() && !gc.hasExtendedAttribute(VCFConstants.PHASE_SET_KEY)) {
                throw new IllegalStateException("Haplotype map VCF Variants' genotypes that are phased must have a PhaseSet (" + VCFConstants.PHASE_SET_KEY+")" + vc.toString());
            }

            if (hasGc && gc.isPhased() && !gc.isHet()) {
                throw new IllegalStateException("Haplotype map VCF Variants' genotypes that are phased must be HET" + vc.toString());
            }

            // Then parse them out
            final String chrom = vc.getContig();
            final int pos = vc.getStart();
            final String name = vc.getID();

            final byte ref = vc.getReference().getBases()[0];
            final byte var = vc.getAlternateAllele(0).getBases()[0];

            final double temp_maf = vc.getAttributeAsDouble(VCFConstants.ALLELE_FREQUENCY_KEY, 0D);
            final boolean swapped = hasGc && !gc.getAllele(0).equals(vc.getReference());

            final byte major, minor;
            final double maf;

            if (swapped) {
                major = var;
                minor = ref;
                maf = 1 - temp_maf;
            } else {
                major = ref;
                minor = var;
                maf = temp_maf;
            }

            final String anchor = anchorFromVc(vc);

            // If it's the anchor snp, start the haplotype
            if (!anchorToHaplotype.containsKey(anchor)) {
                final HaplotypeBlock newBlock = new HaplotypeBlock(maf);
                anchorToHaplotype.put(anchor, newBlock);
            }
            final HaplotypeBlock block = anchorToHaplotype.get(anchor);
            block.addSnp(new Snp(name, chrom, pos, major, minor, maf, null));
        }

        // And add them all now that they are all ready.
        fromHaplotypes(anchorToHaplotype.values());
    }
}
 
Example 9
Source File: ASEReadCounter.java    From gatk with BSD 3-Clause "New" or "Revised" License 4 votes vote down vote up
@Override
public void apply(AlignmentContext alignmentContext, ReferenceContext referenceContext, FeatureContext featureContext) {
    final String contig = alignmentContext.getContig();
    final long position = alignmentContext.getPosition();

    final char refAllele = (char) referenceContext.getBase();

    final List<VariantContext> VCs = featureContext.getValues(variants);
    if (VCs != null && VCs.size() > 1) {
        throw new UserException("More then one variant context at position: " + contig + ":" + position);
    }
    if (VCs == null || VCs.isEmpty()) {
        return;
    }

    final VariantContext vc = VCs.get(0);
    if (!vc.isBiallelic()) {
        logger.warn("Ignoring site: cannot run ASE on non-biallelic sites: " + vc.toString());
        return;
    }

    if (vc.getHetCount() < 1) {
        logger.warn("Ignoring site: variant is not het at postion: " + contig + ":" + position);
        return;
    }

    if (vc.getNAlleles() == 1 || vc.getAlternateAllele(0).getBases().length == 0) {
        throw new UserException("The file of variant sites must contain heterozygous sites and cannot be a GVCF file containing <NON_REF> alleles.");
    }

    final char altAllele = (char) vc.getAlternateAllele(0).getBases()[0];

    final String siteID = vc.getID();
    final ReadPileup pileup = filterPileup(alignmentContext.getBasePileup(), countType);

    // count up the depths of all and QC+ bases
    final String line = calculateLineForSite(pileup, siteID, refAllele, altAllele);
    if (line != null) {
        outputStream.println(line);
    }
}
 
Example 10
Source File: SegmentedCpxVariantSimpleVariantExtractor.java    From gatk with BSD 3-Clause "New" or "Revised" License 4 votes vote down vote up
@Override
List<VariantContext> extract(final VariantContext complexVC, final ReferenceMultiSparkSource reference) {

    final List<SimpleInterval> refSegments =
            SVUtils.getAttributeAsStringList(complexVC, CPX_SV_REF_SEGMENTS).stream()
                    .map(SimpleInterval::new)
                    .collect(Collectors.toList());

    final List<String> altArrangement = SVUtils.getAttributeAsStringList(complexVC, CPX_EVENT_ALT_ARRANGEMENTS);

    final Tuple3<Set<SimpleInterval>, Set<Integer>, List<Integer>> missingAndPresentAndInvertedSegments = getMissingAndPresentAndInvertedSegments(refSegments, altArrangement);
    final Set<SimpleInterval> missingSegments = missingAndPresentAndInvertedSegments._1();
    final Set<Integer> presentSegments = missingAndPresentAndInvertedSegments._2();
    final List<Integer> invertedSegments = missingAndPresentAndInvertedSegments._3();

    final List<VariantContextBuilder> result = new ArrayList<>();

    // if affected ref sequence found as is (trusting the aligner), then only output front and/or rear insertions
    final int idx = findAllSegments(altArrangement, refSegments.size());
    if ( idx >= 0 ) {
        whenAllSegmentsAppearAsIs(complexVC, reference, refSegments, altArrangement, result, idx);
    } else {

        // inversions
        if (!invertedSegments.isEmpty()) {
            extractInversions(reference, refSegments, presentSegments, invertedSegments, result);
        }

        // deletions
        if (!missingSegments.isEmpty()) {
            extractDeletions(reference, missingSegments, result);
        }

        // head and tail insertions only
        extractFrontAndRearInsertions(complexVC, refSegments, altArrangement, reference, result);
    }

    final String sourceID = complexVC.getID();
    final List<String> evidenceContigs = SVUtils.getAttributeAsStringList(complexVC, CONTIG_NAMES);
    final List<String> mappingQualities = SVUtils.getAttributeAsStringList(complexVC, MAPPING_QUALITIES);
    final int maxAlignLength = complexVC.getAttributeAsInt(MAX_ALIGN_LENGTH, 0);

    return result.stream()
            .map(vc -> vc.attribute(CPX_EVENT_KEY, sourceID).attribute(CONTIG_NAMES, evidenceContigs)
                         .attribute(MAPPING_QUALITIES, mappingQualities)
                         .attribute(MAX_ALIGN_LENGTH, maxAlignLength).make())
            .collect(Collectors.toList());
}
 
Example 11
Source File: AnnotatedVariantProducer.java    From gatk with BSD 3-Clause "New" or "Revised" License 4 votes vote down vote up
private static String formatExternalCNVCallAnnotation(final VariantContext externalCNVCall, String sampleId) {
    return externalCNVCall.getID() + ":"
            + externalCNVCall.getGenotype(sampleId).getExtendedAttribute(GATKSVVCFConstants.COPY_NUMBER_FORMAT) + ":"
            + externalCNVCall.getGenotype(sampleId).getExtendedAttribute(GATKSVVCFConstants.COPY_NUMBER_QUALITY_FORMAT);
}