Java Code Examples for htsjdk.variant.variantcontext.VariantContextBuilder#log10PError()

The following examples show how to use htsjdk.variant.variantcontext.VariantContextBuilder#log10PError() . You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: VariantToVcf.java    From genomewarp with Apache License 2.0 4 votes vote down vote up
private static VariantContext getContextFromVariant(VCFHeader header, Variant in) {
  // Create allele list
  String refBases = in.getReferenceBases();
  List<String> altBases = in.getAlternateBasesList();
  List<Allele> alleleList = new ArrayList<>();
  alleleList.add(Allele.create(refBases.getBytes(StandardCharsets.UTF_8), true));
  for (String base : altBases) {
    alleleList.add(Allele.create(base.getBytes(StandardCharsets.UTF_8), false));
  }

  // Note htsjdk start/end are both 1-based closed
  VariantContextBuilder builder = new VariantContextBuilder(SOURCE, in.getReferenceName(),
      in.getStart() + 1, in.getEnd(), alleleList);

  // Set Quality
  if (in.getQuality() != 0.0) {
    builder.log10PError(-in.getQuality() / 10);
  }

  // Set names
  int numNames = in.getNamesCount();
  int i = 0;
  String namesString = "";
  for (String name : in.getNamesList()) {
    if (i == numNames - 1) {
      break;
    }
    namesString += name + ",";
  }
  if (numNames == 0) {
    builder.noID();
  } else {
    // Also handle fence post
    builder.id(namesString + in.getNames(numNames - 1));
  }

  // Set filters
  boolean hadFilter = false;
  for (String filter : in.getFilterList()) {
    hadFilter = true;
    if (filter.equals(VCFConstants.PASSES_FILTERS_v4)) {
      builder.passFilters();
      break;
    }
    if (filter.equals(VCFConstants.MISSING_VALUE_v4)) {
      builder.unfiltered();
      break;
    }

    builder.filter(filter);
  }
  if (!hadFilter) {
    builder.unfiltered();
  }

  // Set info
  Map<String, Object> toSet = new HashMap<>();
  for (Map.Entry<String, ListValue> entry : in.getInfo().entrySet()) {
    String currKey = entry.getKey();
    List<Value> currValue = entry.getValue().getValuesList();
    int currSize = currValue.size();

    if (currSize == 0) {
      // If the key is present in the info map, there must
      // be non-zero attributes associated with it
      throw new IllegalStateException(String.format("info field %s has length 0", currKey));
    }

    VCFHeaderLineType type = header.getInfoHeaderLine(currKey).getType();
    if (currSize == 1) {
      toSet.put(currKey, parseObject(currKey, currValue.get(0), type));
      continue;
    }

    List<Object> objectList = new ArrayList<>();
    for (Value val : currValue) {
      objectList.add(parseObject(currKey, val, type));
    }
    toSet.put(currKey, objectList);
  }
  builder.attributes(toSet);

  // Set calls
  List<Genotype> genotypes = new ArrayList<>();
  for (VariantCall vc : in.getCallsList()) {
    genotypes.add(getGenotypeFromVariantCall(header, vc, alleleList));
  }
  if (genotypes.size() == 0) {
    builder.noGenotypes();
  } else {
    builder.genotypes(genotypes);
  }

  return builder.make();
}
 
Example 2
Source File: GtcToVcf.java    From picard with MIT License 4 votes vote down vote up
private VariantContext makeVariantContext(Build37ExtendedIlluminaManifestRecord record, final InfiniumGTCRecord gtcRecord,
                                          final InfiniumEGTFile egtFile, final ProgressLogger progressLogger) {
    // If the record is not flagged as errant in the manifest we include it in the VCF
    Allele A = record.getAlleleA();
    Allele B = record.getAlleleB();
    Allele ref = record.getRefAllele();

    final String chr = record.getB37Chr();
    final Integer position = record.getB37Pos();
    final Integer endPosition = position + ref.length() - 1;
    Integer egtIndex = egtFile.rsNameToIndex.get(record.getName());
    if (egtIndex == null) {
        throw new PicardException("Found no record in cluster file for manifest entry '" + record.getName() + "'");
    }

    progressLogger.record(chr, position);

    // Create list of unique alleles
    final List<Allele> assayAlleles = new ArrayList<>();
    assayAlleles.add(ref);

    if (A.equals(B)) {
        throw new PicardException("Found same allele (" + A.getDisplayString() + ") for A and B ");
    }

    if (!ref.equals(A, true)) {
        assayAlleles.add(A);
    }

    if (!ref.equals(B, true)) {
        assayAlleles.add(B);
    }

    final String sampleName = FilenameUtils.removeExtension(INPUT.getName());
    final Genotype genotype = getGenotype(sampleName, gtcRecord, record, A, B);

    final VariantContextBuilder builder = new VariantContextBuilder();

    builder.source(record.getName());
    builder.chr(chr);
    builder.start(position);
    builder.stop(endPosition);
    builder.alleles(assayAlleles);
    builder.log10PError(VariantContext.NO_LOG10_PERROR);
    builder.id(record.getName());
    builder.genotypes(genotype);

    VariantContextUtils.calculateChromosomeCounts(builder, false);

    //custom info fields
    builder.attribute(InfiniumVcfFields.ALLELE_A, record.getAlleleA());
    builder.attribute(InfiniumVcfFields.ALLELE_B, record.getAlleleB());
    builder.attribute(InfiniumVcfFields.ILLUMINA_STRAND, record.getIlmnStrand());
    builder.attribute(InfiniumVcfFields.PROBE_A, record.getAlleleAProbeSeq());
    builder.attribute(InfiniumVcfFields.PROBE_B, record.getAlleleBProbeSeq());
    builder.attribute(InfiniumVcfFields.BEADSET_ID, record.getBeadSetId());
    builder.attribute(InfiniumVcfFields.ILLUMINA_CHR, record.getChr());
    builder.attribute(InfiniumVcfFields.ILLUMINA_POS, record.getPosition());
    builder.attribute(InfiniumVcfFields.ILLUMINA_BUILD, record.getGenomeBuild());
    builder.attribute(InfiniumVcfFields.SOURCE, record.getSource().replace(' ', '_'));
    builder.attribute(InfiniumVcfFields.GC_SCORE, formatFloatForVcf(egtFile.totalScore[egtIndex]));

    for (InfiniumVcfFields.GENOTYPE_VALUES gtValue : InfiniumVcfFields.GENOTYPE_VALUES.values()) {
        final int ordinalValue = gtValue.ordinal();
        builder.attribute(InfiniumVcfFields.N[ordinalValue], egtFile.n[egtIndex][ordinalValue]);
        builder.attribute(InfiniumVcfFields.DEV_R[ordinalValue], formatFloatForVcf(egtFile.devR[egtIndex][ordinalValue]));
        builder.attribute(InfiniumVcfFields.MEAN_R[ordinalValue], formatFloatForVcf(egtFile.meanR[egtIndex][ordinalValue]));
        builder.attribute(InfiniumVcfFields.DEV_THETA[ordinalValue], formatFloatForVcf(egtFile.devTheta[egtIndex][ordinalValue]));
        builder.attribute(InfiniumVcfFields.MEAN_THETA[ordinalValue], formatFloatForVcf(egtFile.meanTheta[egtIndex][ordinalValue]));

        final EuclideanValues genotypeEuclideanValues = polarToEuclidean(egtFile.meanR[egtIndex][ordinalValue], egtFile.devR[egtIndex][ordinalValue],
                egtFile.meanTheta[egtIndex][ordinalValue], egtFile.devTheta[egtIndex][ordinalValue]);
        builder.attribute(InfiniumVcfFields.DEV_X[ordinalValue], formatFloatForVcf(genotypeEuclideanValues.devX));
        builder.attribute(InfiniumVcfFields.MEAN_X[ordinalValue], formatFloatForVcf(genotypeEuclideanValues.meanX));
        builder.attribute(InfiniumVcfFields.DEV_Y[ordinalValue], formatFloatForVcf(genotypeEuclideanValues.devY));
        builder.attribute(InfiniumVcfFields.MEAN_Y[ordinalValue], formatFloatForVcf(genotypeEuclideanValues.meanY));
    }


    final String rsid = record.getRsId();
    if (StringUtils.isNotEmpty(rsid)) {
        builder.attribute(InfiniumVcfFields.RS_ID, rsid);
    }
    if (egtFile.totalScore[egtIndex] == 0.0) {
        builder.filter(InfiniumVcfFields.ZEROED_OUT_ASSAY);
        if (genotype.isCalled()) {
            if (DO_NOT_ALLOW_CALLS_ON_ZEROED_OUT_ASSAYS) {
                throw new PicardException("Found a call (genotype: " + genotype + ") on a zeroed out Assay!!");
            } else {
                log.warn("Found a call (genotype: " + genotype + ") on a zeroed out Assay. " +
                        "This could occur if you called genotypes on a different cluster file than used here.");
            }
        }
    }
    if (record.isDupe()) {
        builder.filter(InfiniumVcfFields.DUPE);
    }
    return builder.make();
}
 
Example 3
Source File: CombineGenotypingArrayVcfs.java    From picard with MIT License 4 votes vote down vote up
/**
 * Merges multiple VariantContexts all for the same locus into a single hybrid.
 *
 * @param variantContexts           list of VCs
 * @return new VariantContext       representing the merge of variantContexts
 */
public static VariantContext merge(final List<VariantContext> variantContexts) {
    if ( variantContexts == null || variantContexts.isEmpty() )
        return null;

    // establish the baseline info from the first VC
    final VariantContext first = variantContexts.get(0);
    final String name = first.getSource();

    final Set<String> filters = new HashSet<>();

    int depth = 0;
    double log10PError = CommonInfo.NO_LOG10_PERROR;
    boolean anyVCHadFiltersApplied = false;
    GenotypesContext genotypes = GenotypesContext.create();

    // Go through all the VCs, verify that the loci and ID and other attributes agree.
    final Map<String, Object> firstAttributes = first.getAttributes();
    for (final VariantContext vc : variantContexts ) {
        if ((vc.getStart() != first.getStart()) || (!vc.getContig().equals(first.getContig()))) {
            throw new PicardException("Mismatch in loci among input VCFs");
        }
        if (!vc.getID().equals(first.getID())) {
            throw new PicardException("Mismatch in ID field among input VCFs");
        }
        if (!vc.getReference().equals(first.getReference())) {
            throw new PicardException("Mismatch in REF allele among input VCFs");
        }
        checkThatAllelesMatch(vc, first);

        genotypes.addAll(vc.getGenotypes());

        // We always take the QUAL of the first VC with a non-MISSING qual for the combined value
        if ( log10PError == CommonInfo.NO_LOG10_PERROR )
            log10PError =  vc.getLog10PError();

        filters.addAll(vc.getFilters());
        anyVCHadFiltersApplied |= vc.filtersWereApplied();

        // add attributes
        // special case DP (add it up)
        if (vc.hasAttribute(VCFConstants.DEPTH_KEY))
            depth += vc.getAttributeAsInt(VCFConstants.DEPTH_KEY, 0);

        // Go through all attributes - if any differ (other than AC, AF, or AN) that's an error.  (recalc AC, AF, and AN later)
        for (final Map.Entry<String, Object> p : vc.getAttributes().entrySet()) {
            final String key = p.getKey();
            if ((!key.equals("AC")) && (!key.equals("AF")) && (!key.equals("AN")) && (!key.equals("devX_AB")) && (!key.equals("devY_AB"))) {
                final Object value = p.getValue();
                final Object extantValue = firstAttributes.get(key);
                if (extantValue == null) {
                    // attribute in one VCF but not another.  Die!
                    throw new PicardException("Attribute '" + key + "' not found in all VCFs");
                }
                else if (!extantValue.equals(value)) {
                    // Attribute disagrees in value between one VCF Die! (if not AC, AF, nor AN, skipped above)
                    throw new PicardException("Values for attribute '" + key + "' disagrees among input VCFs");
                }
            }
        }
    }

    if (depth > 0)
        firstAttributes.put(VCFConstants.DEPTH_KEY, String.valueOf(depth));

    final VariantContextBuilder builder = new VariantContextBuilder().source(name).id(first.getID());
    builder.loc(first.getContig(), first.getStart(), first.getEnd());
    builder.alleles(first.getAlleles());
    builder.genotypes(genotypes);

    builder.attributes(new TreeMap<>(firstAttributes));
    // AC, AF, and AN are recalculated here
    VariantContextUtils.calculateChromosomeCounts(builder, false);

    builder.log10PError(log10PError);
    if ( anyVCHadFiltersApplied ) {
        builder.filters(filters.isEmpty() ? filters : new TreeSet<>(filters));
    }

    return builder.make();
}
 
Example 4
Source File: LiftoverUtils.java    From picard with MIT License 4 votes vote down vote up
/**
 * This will take an input VariantContext and lift to the provided interval.
 * If this interval is in the opposite orientation, all alleles and genotypes will be reverse complemented
 * and indels will be left-aligned.  Currently this is only able to invert biallelic indels, and null will be
 * returned for any unsupported VC.
 *
 * @param source                The VariantContext to lift
 * @param target                The target interval
 * @param refSeq                The reference sequence, which should match the target interval
 * @param writeOriginalPosition If true, INFO field annotations will be added to store the original position and contig
 * @param writeOriginalAlleles  If true, an INFO field annotation will be added to store the original alleles in order.  This can be useful in the case of more complex liftovers, like reverse complements, left aligned indels or swapped REF/ALT
 * @return The lifted VariantContext.  This will be null if the input VariantContext could not be lifted.
 */
public static VariantContext liftVariant(final VariantContext source, final Interval target, final ReferenceSequence refSeq, final boolean writeOriginalPosition, final boolean writeOriginalAlleles) {
    if (target == null) {
        return null;
    }

    final VariantContextBuilder builder;
    if (target.isNegativeStrand()) {
        builder = reverseComplementVariantContext(source, target, refSeq);
    } else {
        builder = liftSimpleVariantContext(source, target);
    }

    if (builder == null) {
        return null;
    }

    builder.filters(source.getFilters());
    builder.log10PError(source.getLog10PError());

    // If any of the source alleles is symbolic, do not populate the END tag (protecting things like <NON_REF> from
    // getting screwed up in reverse-complemented variants
    if (source.hasAttribute(VCFConstants.END_KEY) && builder.getAlleles().stream().noneMatch(Allele::isSymbolic)) {
        builder.attribute(VCFConstants.END_KEY, (int) builder.getStop());
    } else {
        builder.rmAttribute(VCFConstants.END_KEY);
    }

    // make sure that the variant isn't mistakenly set as "SwappedAlleles"
    builder.rmAttribute(SWAPPED_ALLELES);
    if (target.isNegativeStrand()) {
        builder.attribute(REV_COMPED_ALLELES, true);
    } else {
        // make sure that the variant isn't mistakenly set as "ReverseComplementedAlleles" (from a previous liftover, say)
        builder.rmAttribute(REV_COMPED_ALLELES);
    }

    builder.id(source.getID());

    if (writeOriginalPosition) {
        builder.attribute(LiftoverVcf.ORIGINAL_CONTIG, source.getContig());
        builder.attribute(LiftoverVcf.ORIGINAL_START, source.getStart());
    }

    if (writeOriginalAlleles && !source.getAlleles().equals(builder.getAlleles())) {
        builder.attribute(LiftoverVcf.ORIGINAL_ALLELES, allelesToStringList(source.getAlleles()));
    }

    return builder.make();
}
 
Example 5
Source File: VariantContextCodec.java    From Hadoop-BAM with MIT License 4 votes vote down vote up
public static VariantContext read(final DataInput in) throws IOException {
	final VariantContextBuilder builder = new VariantContextBuilder();

	int count, len;
	byte[] b;

	len = in.readInt();
	b = new byte[len];
	in.readFully(b);
	final String chrom = new String(b, "UTF-8");
	builder.chr(chrom);

	final int start = in.readInt();
	builder.start(start);
	builder.stop (in.readInt());

	len = in.readInt();
	if (len == 0)
		builder.noID();
	else {
		if (len > b.length) b = new byte[len];
		in.readFully(b, 0, len);
		builder.id(new String(b, 0, len, "UTF-8"));
	}

	count = in.readInt();
	final List<Allele> alleles = new ArrayList<Allele>(count);
	for (int i = 0; i < count; ++i) {
		len = in.readInt();
		if (len > b.length) b = new byte[len];
		in.readFully(b, 0, len);
		alleles.add(Allele.create(Arrays.copyOf(b, len), i == 0));
	}
	builder.alleles(alleles);

	final int qualInt = in.readInt();
	builder.log10PError(
		qualInt == 0x7f800001
			? VariantContext.NO_LOG10_PERROR
			: Float.intBitsToFloat(qualInt));

	count = in.readInt();
	switch (count) {
	case -2: builder.unfiltered(); break;
	case -1: builder.passFilters(); break;
	default:
		while (count-- > 0) {
			len = in.readInt();
			if (len > b.length) b = new byte[len];
			in.readFully(b, 0, len);
			builder.filter(new String(b, 0, len, "UTF-8"));
		}
		break;
	}

	count = in.readInt();
	final Map<String,Object> attrs = new HashMap<String,Object>(count, 1);
	while (count-- > 0) {
		len = in.readInt();
		if (len > b.length) b = new byte[len];
		in.readFully(b, 0, len);
		attrs.put(new String(b, 0, len, "UTF-8"), decodeAttrVal(in));
	}
	builder.attributes(attrs);

	count = in.readInt();
	final byte genoType = in.readByte();
	len = in.readInt();

	// Resize b even if it's already big enough, minimizing the amount of
	// memory LazyGenotypesContext hangs on to.
	b = new byte[len];
	in.readFully(b);

	switch (genoType) {
	case 0:
		builder.genotypesNoValidation(
			new LazyVCFGenotypesContext(alleles, chrom, start, b, count));
		break;

	case 1:
		builder.genotypesNoValidation(
			new LazyBCFGenotypesContext(alleles, in.readInt(), b, count));
		break;

	default:
		throw new IOException(
			"Invalid genotypes type identifier: cannot decode");
	}

	return builder.make();
}