Java Code Examples for htsjdk.variant.variantcontext.VariantContextBuilder#filter()

The following examples show how to use htsjdk.variant.variantcontext.VariantContextBuilder#filter() . You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: FilterVariantTranches.java    From gatk with BSD 3-Clause "New" or "Revised" License 6 votes vote down vote up
@Override
protected void secondPassApply(VariantContext variant, ReadsContext readsContext, ReferenceContext referenceContext, FeatureContext featureContext) {
    final VariantContextBuilder builder = new VariantContextBuilder(variant);

    if (removeOldFilters) {
        builder.unfiltered();
    }

    if (variant.hasAttribute(infoKey)) {
        final double score = Double.parseDouble((String) variant.getAttribute(infoKey));
        if (variant.isSNP() && snpCutoffs.size() != 0 && isTrancheFiltered(score, snpCutoffs)) {
            builder.filter(filterStringFromScore(SNPString, score, snpTranches, snpCutoffs));
            filteredSnps++;
        } else if (variant.isIndel() && indelCutoffs.size() != 0 && isTrancheFiltered(score, indelCutoffs)) {
            builder.filter(filterStringFromScore(INDELString, score, indelTranches, indelCutoffs));
            filteredIndels++;
        }
    }

    if (builder.getFilters() == null || builder.getFilters().size() == 0){
        builder.passFilters();
    }
    
    vcfWriter.add(builder.make());
}
 
Example 2
Source File: FilterFuncotations.java    From gatk with BSD 3-Clause "New" or "Revised" License 6 votes vote down vote up
/**
 * Mark a variant as matching a set of Funcotation filters, or as matching no filters.
 */
private VariantContext applyFilters(final VariantContext variant, final Set<String> matchingFilters) {
    final VariantContextBuilder variantContextBuilder = new VariantContextBuilder(variant);
    final boolean isSignificant = !matchingFilters.isEmpty();

    // Mark the individual filters that make the variant significant, if any.
    final String clinicalSignificance = isSignificant ?
            String.join(FilterFuncotationsConstants.FILTER_DELIMITER, matchingFilters) :
            FilterFuncotationsConstants.CLINSIG_INFO_NOT_SIGNIFICANT;
    variantContextBuilder.attribute(FilterFuncotationsConstants.CLINSIG_INFO_KEY, clinicalSignificance);

    // Also set the filter field for insignificant variants, to make it easier for
    // downstream tools to extract out the interesting data.
    if (isSignificant) {
        variantContextBuilder.passFilters();
    } else {
        variantContextBuilder.filter(FilterFuncotationsConstants.NOT_CLINSIG_FILTER);
    }

    return variantContextBuilder.make();
}
 
Example 3
Source File: MTLowHeteroplasmyFilterTool.java    From gatk with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
@Override
protected void secondPassApply(VariantContext variant, ReadsContext readsContext, ReferenceContext referenceContext, FeatureContext featureContext) {
    VariantContextBuilder vcb = new VariantContextBuilder(variant);
    if (failedLowHet) {
        List<Boolean> appliedFilter = areAllelesArtifacts(variant);
        if (!appliedFilter.contains(Boolean.FALSE)) {
            vcb.filter(filterName());
        }
        if (appliedFilter.contains(Boolean.TRUE)) {
            vcb.attribute(GATKVCFConstants.AS_FILTER_STATUS_KEY, AlleleFilterUtils.getMergedASFilterString(variant, appliedFilter, filterName()));
        }
    }
    vcfWriter.add(vcb.make());
}
 
Example 4
Source File: NuMTFilterTool.java    From gatk with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
@Override
public void apply(VariantContext variant, ReadsContext readsContext, ReferenceContext referenceContext, FeatureContext featureContext) {
    VariantContextBuilder vcb = new VariantContextBuilder(variant);
    LinkedHashMap<Allele, List<Integer>> dataByAllele = Mutect2AlleleFilter.getDataByAllele(variant, Genotype::hasAD, this::getData, null);
    List<Boolean> appliedFilter = dataByAllele.entrySet().stream()
            .filter(entry -> !variant.getReference().equals(entry.getKey()))
            .map(entry -> entry.getValue().stream().max(Integer::compare).orElse(0) < maxAltDepthCutoff).collect(Collectors.toList());
    if (!appliedFilter.contains(Boolean.FALSE)) {
        vcb.filter(filterName());
    }
    if (appliedFilter.contains(Boolean.TRUE)) {
        vcb.attribute(GATKVCFConstants.AS_FILTER_STATUS_KEY, AlleleFilterUtils.getMergedASFilterString(variant, appliedFilter, filterName()));
    }
    vcfWriter.add(vcb.make());
}
 
Example 5
Source File: FuncotatorTestUtils.java    From gatk with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
/**
 *  Create a variant context with the following fields.  Note that the genotype will be empty, as will
 *  INFO annotations.
 *
 * @param reference E.g. {@link FuncotatorReferenceTestUtils#retrieveHg19Chr3Ref}
 * @param contig Never {@code null}
 * @param start Must be positive.
 * @param end Must be positive.
 * @param refAlleleString Valid {@link String} for an allele.  Do not include "*".  Never {@code null}
 * @param altAlleleString Valid {@link String} for an allele.  Do not include "*".  Never {@code null}
 * @param filterString Valid {@link String} for filters (See VCF 4.2 spec).  May be {@code null}.
 * @return a simple, biallelic variant context
 */
public static VariantContext createSimpleVariantContext(final String reference, final String contig,
                                                        final int start,
                                                        final int end,
                                                        final String refAlleleString,
                                                        final String altAlleleString,
                                                        final String filterString) {
    Utils.nonNull(contig);
    Utils.nonNull(refAlleleString);
    Utils.nonNull(altAlleleString);
    ParamUtils.isPositive(start, "Invalid start position: " + start);
    ParamUtils.isPositive(end, "Invalid end position: " + end);

    final Allele refAllele = Allele.create(refAlleleString, true);
    final Allele altAllele = Allele.create(altAlleleString);

    final VariantContextBuilder variantContextBuilder = new VariantContextBuilder(
            reference,
            contig,
            start,
            end,
            Arrays.asList(refAllele, altAllele)
    );

    if ( filterString != null ) {
        variantContextBuilder.filter(filterString);
    }
    else {
        variantContextBuilder.passFilters();
    }

    final VariantContext vc = variantContextBuilder.make();

    return vc;
}
 
Example 6
Source File: HotspotEvidenceVCF.java    From hmftools with GNU General Public License v3.0 4 votes vote down vote up
@NotNull
private VariantContext create(@NotNull final HotspotEvidence hotspotEvidence) {
    final Allele ref = Allele.create(hotspotEvidence.ref(), true);
    final Allele alt = Allele.create(hotspotEvidence.alt(), false);
    final List<Allele> alleles = Lists.newArrayList(ref, alt);

    final Genotype tumor = new GenotypeBuilder(tumorSample).DP(hotspotEvidence.tumorReads())
            .AD(new int[] { hotspotEvidence.tumorRefCount(), hotspotEvidence.tumorAltCount() })
            .alleles(alleles)
            .make();

    final Genotype normal = new GenotypeBuilder(normalSample).DP(hotspotEvidence.normalReads())
            .AD(new int[] { hotspotEvidence.normalRefCount(), hotspotEvidence.normalAltCount() })
            .alleles(alleles)
            .make();

    final VariantContextBuilder builder = new VariantContextBuilder().chr(hotspotEvidence.chromosome())
            .start(hotspotEvidence.position())
            .attribute(HOTSPOT_FLAG, hotspotEvidence.type().toString().toLowerCase())
            .attribute(ALLELIC_FREQUENCY, round(hotspotEvidence.vaf()))
            .computeEndFromAlleles(alleles, (int) hotspotEvidence.position())
            .source(hotspotEvidence.type().toString())
            .genotypes(tumor, normal)
            .alleles(alleles);

    boolean lowConfidence = lowConfidence(hotspotEvidence);
    if (hotspotEvidence.normalAltCount() == 1) {
        double normalHetLikelihood = heterozygousLikelihood(hotspotEvidence.normalReads());
        builder.attribute(GERMLINE_HET_LIKELIHOOD, round(normalHetLikelihood));
        lowConfidence |= Doubles.greaterThan(normalHetLikelihood, maxNormalHetLikelihood);
    }

    if (lowConfidence) {
        builder.filter(LOW_CONFIDENCE);
    } else if (germlineIndel(hotspotEvidence)) {
        builder.filter(GERMLINE_INDEL);
    } else {
        builder.filter(PASS);
    }

    final VariantContext context = builder.make();
    context.getCommonInfo().setLog10PError(hotspotEvidence.qualityScore() / -10d);
    return context;
}
 
Example 7
Source File: VariantToVcf.java    From genomewarp with Apache License 2.0 4 votes vote down vote up
private static VariantContext getContextFromVariant(VCFHeader header, Variant in) {
  // Create allele list
  String refBases = in.getReferenceBases();
  List<String> altBases = in.getAlternateBasesList();
  List<Allele> alleleList = new ArrayList<>();
  alleleList.add(Allele.create(refBases.getBytes(StandardCharsets.UTF_8), true));
  for (String base : altBases) {
    alleleList.add(Allele.create(base.getBytes(StandardCharsets.UTF_8), false));
  }

  // Note htsjdk start/end are both 1-based closed
  VariantContextBuilder builder = new VariantContextBuilder(SOURCE, in.getReferenceName(),
      in.getStart() + 1, in.getEnd(), alleleList);

  // Set Quality
  if (in.getQuality() != 0.0) {
    builder.log10PError(-in.getQuality() / 10);
  }

  // Set names
  int numNames = in.getNamesCount();
  int i = 0;
  String namesString = "";
  for (String name : in.getNamesList()) {
    if (i == numNames - 1) {
      break;
    }
    namesString += name + ",";
  }
  if (numNames == 0) {
    builder.noID();
  } else {
    // Also handle fence post
    builder.id(namesString + in.getNames(numNames - 1));
  }

  // Set filters
  boolean hadFilter = false;
  for (String filter : in.getFilterList()) {
    hadFilter = true;
    if (filter.equals(VCFConstants.PASSES_FILTERS_v4)) {
      builder.passFilters();
      break;
    }
    if (filter.equals(VCFConstants.MISSING_VALUE_v4)) {
      builder.unfiltered();
      break;
    }

    builder.filter(filter);
  }
  if (!hadFilter) {
    builder.unfiltered();
  }

  // Set info
  Map<String, Object> toSet = new HashMap<>();
  for (Map.Entry<String, ListValue> entry : in.getInfo().entrySet()) {
    String currKey = entry.getKey();
    List<Value> currValue = entry.getValue().getValuesList();
    int currSize = currValue.size();

    if (currSize == 0) {
      // If the key is present in the info map, there must
      // be non-zero attributes associated with it
      throw new IllegalStateException(String.format("info field %s has length 0", currKey));
    }

    VCFHeaderLineType type = header.getInfoHeaderLine(currKey).getType();
    if (currSize == 1) {
      toSet.put(currKey, parseObject(currKey, currValue.get(0), type));
      continue;
    }

    List<Object> objectList = new ArrayList<>();
    for (Value val : currValue) {
      objectList.add(parseObject(currKey, val, type));
    }
    toSet.put(currKey, objectList);
  }
  builder.attributes(toSet);

  // Set calls
  List<Genotype> genotypes = new ArrayList<>();
  for (VariantCall vc : in.getCallsList()) {
    genotypes.add(getGenotypeFromVariantCall(header, vc, alleleList));
  }
  if (genotypes.size() == 0) {
    builder.noGenotypes();
  } else {
    builder.genotypes(genotypes);
  }

  return builder.make();
}
 
Example 8
Source File: GtcToVcf.java    From picard with MIT License 4 votes vote down vote up
private VariantContext makeVariantContext(Build37ExtendedIlluminaManifestRecord record, final InfiniumGTCRecord gtcRecord,
                                          final InfiniumEGTFile egtFile, final ProgressLogger progressLogger) {
    // If the record is not flagged as errant in the manifest we include it in the VCF
    Allele A = record.getAlleleA();
    Allele B = record.getAlleleB();
    Allele ref = record.getRefAllele();

    final String chr = record.getB37Chr();
    final Integer position = record.getB37Pos();
    final Integer endPosition = position + ref.length() - 1;
    Integer egtIndex = egtFile.rsNameToIndex.get(record.getName());
    if (egtIndex == null) {
        throw new PicardException("Found no record in cluster file for manifest entry '" + record.getName() + "'");
    }

    progressLogger.record(chr, position);

    // Create list of unique alleles
    final List<Allele> assayAlleles = new ArrayList<>();
    assayAlleles.add(ref);

    if (A.equals(B)) {
        throw new PicardException("Found same allele (" + A.getDisplayString() + ") for A and B ");
    }

    if (!ref.equals(A, true)) {
        assayAlleles.add(A);
    }

    if (!ref.equals(B, true)) {
        assayAlleles.add(B);
    }

    final String sampleName = FilenameUtils.removeExtension(INPUT.getName());
    final Genotype genotype = getGenotype(sampleName, gtcRecord, record, A, B);

    final VariantContextBuilder builder = new VariantContextBuilder();

    builder.source(record.getName());
    builder.chr(chr);
    builder.start(position);
    builder.stop(endPosition);
    builder.alleles(assayAlleles);
    builder.log10PError(VariantContext.NO_LOG10_PERROR);
    builder.id(record.getName());
    builder.genotypes(genotype);

    VariantContextUtils.calculateChromosomeCounts(builder, false);

    //custom info fields
    builder.attribute(InfiniumVcfFields.ALLELE_A, record.getAlleleA());
    builder.attribute(InfiniumVcfFields.ALLELE_B, record.getAlleleB());
    builder.attribute(InfiniumVcfFields.ILLUMINA_STRAND, record.getIlmnStrand());
    builder.attribute(InfiniumVcfFields.PROBE_A, record.getAlleleAProbeSeq());
    builder.attribute(InfiniumVcfFields.PROBE_B, record.getAlleleBProbeSeq());
    builder.attribute(InfiniumVcfFields.BEADSET_ID, record.getBeadSetId());
    builder.attribute(InfiniumVcfFields.ILLUMINA_CHR, record.getChr());
    builder.attribute(InfiniumVcfFields.ILLUMINA_POS, record.getPosition());
    builder.attribute(InfiniumVcfFields.ILLUMINA_BUILD, record.getGenomeBuild());
    builder.attribute(InfiniumVcfFields.SOURCE, record.getSource().replace(' ', '_'));
    builder.attribute(InfiniumVcfFields.GC_SCORE, formatFloatForVcf(egtFile.totalScore[egtIndex]));

    for (InfiniumVcfFields.GENOTYPE_VALUES gtValue : InfiniumVcfFields.GENOTYPE_VALUES.values()) {
        final int ordinalValue = gtValue.ordinal();
        builder.attribute(InfiniumVcfFields.N[ordinalValue], egtFile.n[egtIndex][ordinalValue]);
        builder.attribute(InfiniumVcfFields.DEV_R[ordinalValue], formatFloatForVcf(egtFile.devR[egtIndex][ordinalValue]));
        builder.attribute(InfiniumVcfFields.MEAN_R[ordinalValue], formatFloatForVcf(egtFile.meanR[egtIndex][ordinalValue]));
        builder.attribute(InfiniumVcfFields.DEV_THETA[ordinalValue], formatFloatForVcf(egtFile.devTheta[egtIndex][ordinalValue]));
        builder.attribute(InfiniumVcfFields.MEAN_THETA[ordinalValue], formatFloatForVcf(egtFile.meanTheta[egtIndex][ordinalValue]));

        final EuclideanValues genotypeEuclideanValues = polarToEuclidean(egtFile.meanR[egtIndex][ordinalValue], egtFile.devR[egtIndex][ordinalValue],
                egtFile.meanTheta[egtIndex][ordinalValue], egtFile.devTheta[egtIndex][ordinalValue]);
        builder.attribute(InfiniumVcfFields.DEV_X[ordinalValue], formatFloatForVcf(genotypeEuclideanValues.devX));
        builder.attribute(InfiniumVcfFields.MEAN_X[ordinalValue], formatFloatForVcf(genotypeEuclideanValues.meanX));
        builder.attribute(InfiniumVcfFields.DEV_Y[ordinalValue], formatFloatForVcf(genotypeEuclideanValues.devY));
        builder.attribute(InfiniumVcfFields.MEAN_Y[ordinalValue], formatFloatForVcf(genotypeEuclideanValues.meanY));
    }


    final String rsid = record.getRsId();
    if (StringUtils.isNotEmpty(rsid)) {
        builder.attribute(InfiniumVcfFields.RS_ID, rsid);
    }
    if (egtFile.totalScore[egtIndex] == 0.0) {
        builder.filter(InfiniumVcfFields.ZEROED_OUT_ASSAY);
        if (genotype.isCalled()) {
            if (DO_NOT_ALLOW_CALLS_ON_ZEROED_OUT_ASSAYS) {
                throw new PicardException("Found a call (genotype: " + genotype + ") on a zeroed out Assay!!");
            } else {
                log.warn("Found a call (genotype: " + genotype + ") on a zeroed out Assay. " +
                        "This could occur if you called genotypes on a different cluster file than used here.");
            }
        }
    }
    if (record.isDupe()) {
        builder.filter(InfiniumVcfFields.DUPE);
    }
    return builder.make();
}
 
Example 9
Source File: VariantContextCodec.java    From Hadoop-BAM with MIT License 4 votes vote down vote up
public static VariantContext read(final DataInput in) throws IOException {
	final VariantContextBuilder builder = new VariantContextBuilder();

	int count, len;
	byte[] b;

	len = in.readInt();
	b = new byte[len];
	in.readFully(b);
	final String chrom = new String(b, "UTF-8");
	builder.chr(chrom);

	final int start = in.readInt();
	builder.start(start);
	builder.stop (in.readInt());

	len = in.readInt();
	if (len == 0)
		builder.noID();
	else {
		if (len > b.length) b = new byte[len];
		in.readFully(b, 0, len);
		builder.id(new String(b, 0, len, "UTF-8"));
	}

	count = in.readInt();
	final List<Allele> alleles = new ArrayList<Allele>(count);
	for (int i = 0; i < count; ++i) {
		len = in.readInt();
		if (len > b.length) b = new byte[len];
		in.readFully(b, 0, len);
		alleles.add(Allele.create(Arrays.copyOf(b, len), i == 0));
	}
	builder.alleles(alleles);

	final int qualInt = in.readInt();
	builder.log10PError(
		qualInt == 0x7f800001
			? VariantContext.NO_LOG10_PERROR
			: Float.intBitsToFloat(qualInt));

	count = in.readInt();
	switch (count) {
	case -2: builder.unfiltered(); break;
	case -1: builder.passFilters(); break;
	default:
		while (count-- > 0) {
			len = in.readInt();
			if (len > b.length) b = new byte[len];
			in.readFully(b, 0, len);
			builder.filter(new String(b, 0, len, "UTF-8"));
		}
		break;
	}

	count = in.readInt();
	final Map<String,Object> attrs = new HashMap<String,Object>(count, 1);
	while (count-- > 0) {
		len = in.readInt();
		if (len > b.length) b = new byte[len];
		in.readFully(b, 0, len);
		attrs.put(new String(b, 0, len, "UTF-8"), decodeAttrVal(in));
	}
	builder.attributes(attrs);

	count = in.readInt();
	final byte genoType = in.readByte();
	len = in.readInt();

	// Resize b even if it's already big enough, minimizing the amount of
	// memory LazyGenotypesContext hangs on to.
	b = new byte[len];
	in.readFully(b);

	switch (genoType) {
	case 0:
		builder.genotypesNoValidation(
			new LazyVCFGenotypesContext(alleles, chrom, start, b, count));
		break;

	case 1:
		builder.genotypesNoValidation(
			new LazyBCFGenotypesContext(alleles, in.readInt(), b, count));
		break;

	default:
		throw new IOException(
			"Invalid genotypes type identifier: cannot decode");
	}

	return builder.make();
}
 
Example 10
Source File: FilterAlignmentArtifacts.java    From gatk with BSD 3-Clause "New" or "Revised" License 4 votes vote down vote up
@Override
public void apply(List<VariantContext> variantContexts, ReferenceContext referenceContext, final List<ReadsContext> readsContexts) {


    // for now we do one variant at a time but eventually we will want to combine all reads supporting all variants
    // into a single graph.  This is non-trivial because there may be more than one phasing between variants.
    for (final VariantContext vc : variantContexts) {
        final AssemblyRegion assemblyRegion = makeAssemblyRegionFromVariantReads(readsContexts, vc);


        // TODO: give this tool M2 Assembler args to allow override default M2ArgumentCollection?
        final AssemblyResultSet assemblyResult = AssemblyBasedCallerUtils.assembleReads(assemblyRegion, Collections.emptyList(), MTAC, bamHeader, samplesList, logger, referenceReader, assemblyEngine, ALIGNER, false);
        final AssemblyRegion regionForGenotyping = assemblyResult.getRegionForGenotyping();

        final Map<String,List<GATKRead>> reads = AssemblyBasedCallerUtils.splitReadsBySample(samplesList, bamHeader, regionForGenotyping.getReads());

        final AlleleLikelihoods<GATKRead, Haplotype> readLikelihoods = likelihoodCalculationEngine.computeReadLikelihoods(assemblyResult,samplesList,reads);
        readLikelihoods.switchToNaturalLog();
        final Map<GATKRead,GATKRead> readRealignments = AssemblyBasedCallerUtils.realignReadsToTheirBestHaplotype(readLikelihoods, assemblyResult.getReferenceHaplotype(), assemblyResult.getPaddedReferenceLoc(), ALIGNER);
        readLikelihoods.changeEvidence(readRealignments);
        writeBamOutput(assemblyResult, readLikelihoods, new HashSet<>(readLikelihoods.alleles()), regionForGenotyping.getSpan());

        final LocusIteratorByState libs = new LocusIteratorByState(regionForGenotyping.getReads().iterator(), DownsamplingMethod.NONE, false, samplesList.asListOfSamples(), bamHeader, true);

        final List<byte[]> unitigs = getUnitigs(libs);

        final VariantContextBuilder vcb = new VariantContextBuilder(vc)
                .attribute(GATKVCFConstants.UNITIG_SIZES_KEY, unitigs.stream().mapToInt(u -> u.length).toArray());

        final List<List<BwaMemAlignment>> unitigAlignments = unitigs.stream()
                .map(realignmentEngine::realign).collect(Collectors.toList());

        final List<List<BwaMemAlignment>> jointAlignments = RealignmentEngine.findJointAlignments(unitigAlignments, realignmentArgumentCollection.maxReasonableFragmentLength);
        vcb.attribute(GATKVCFConstants.JOINT_ALIGNMENT_COUNT_KEY, jointAlignments.size());
        jointAlignments.sort(Comparator.comparingInt(FilterAlignmentArtifacts::jointAlignmentScore).reversed());

        // best mapping to another contig
        if (!jointAlignments.isEmpty() && jointAlignments.get(0).get(0).getRefId() != getReferenceDictionary().getSequenceIndex(vc.getContig())) {
            vcb.filter(GATKVCFConstants.ALIGNMENT_ARTIFACT_FILTER_NAME);
        } else if (jointAlignments.size() > 1) {

            final int totalBases = unitigs.stream().mapToInt(unitig -> unitig.length).sum();
            final int scoreDiff = jointAlignmentScore(jointAlignments.get(0)) - jointAlignmentScore(jointAlignments.get(1));
            final int mismatchDiff = totalMismatches(jointAlignments.get(1)) - totalMismatches(jointAlignments.get(0));

            vcb.attribute(GATKVCFConstants.ALIGNMENT_SCORE_DIFFERENCE_KEY, scoreDiff);

            final boolean multimapping = (double) scoreDiff / totalBases < realignmentArgumentCollection.minAlignerScoreDifferencePerBase
                    && (double) mismatchDiff / totalBases < realignmentArgumentCollection.minMismatchDifferencePerBase;

            if (multimapping) {
                vcb.filter(GATKVCFConstants.ALIGNMENT_ARTIFACT_FILTER_NAME);
            }
        }

        vcfWriter.add(vcb.make());
    }
}