Java Code Examples for htsjdk.variant.variantcontext.VariantContextBuilder#genotypes()

The following examples show how to use htsjdk.variant.variantcontext.VariantContextBuilder#genotypes() . You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: SelectVariants.java    From gatk with BSD 3-Clause "New" or "Revised" License 6 votes vote down vote up
private VariantContext buildVariantContextWithDroppedAnnotationsRemoved(final VariantContext vc) {
    if (infoAnnotationsToDrop.isEmpty() && genotypeAnnotationsToDrop.isEmpty()) {
        return vc;
    }
    final VariantContextBuilder rmAnnotationsBuilder = new VariantContextBuilder(vc);
    for (String infoField : infoAnnotationsToDrop) {
        rmAnnotationsBuilder.rmAttribute(infoField);
    }
    if (!genotypeAnnotationsToDrop.isEmpty()) {
        final ArrayList<Genotype> genotypesToWrite = new ArrayList<>();
        for (Genotype genotype : vc.getGenotypes()) {
            final GenotypeBuilder genotypeBuilder = new GenotypeBuilder(genotype).noAttributes();
            final Map<String, Object> attributes = new HashMap<>(genotype.getExtendedAttributes());
            for (String genotypeAnnotation : genotypeAnnotationsToDrop) {
                attributes.remove(genotypeAnnotation);
            }
            genotypeBuilder.attributes(attributes);
            genotypesToWrite.add(genotypeBuilder.make());
        }
        rmAnnotationsBuilder.genotypes(GenotypesContext.create(genotypesToWrite));
    }
    return rmAnnotationsBuilder.make();
}
 
Example 2
Source File: VariantEnricher.java    From hmftools with GNU General Public License v3.0 5 votes vote down vote up
private void buildVariants(final String sampleId, List<BachelorGermlineVariant> bachRecords)
{
    for (final BachelorGermlineVariant bachRecord : bachRecords)
    {
        VariantContextBuilder builder = new VariantContextBuilder();
        builder.id(bachRecord.VariantId);
        builder.loc(bachRecord.Chromosome, bachRecord.Position, bachRecord.Position + bachRecord.Ref.length() - 1);

        List<Allele> alleles = Lists.newArrayList();
        alleles.add(Allele.create(bachRecord.Ref, true));
        alleles.add(Allele.create(bachRecord.Alts, false));
        builder.alleles(alleles);

        List<Genotype> genoTypes = Lists.newArrayList();

        GenotypeBuilder gBuilder = new GenotypeBuilder(sampleId, builder.getAlleles());
        int[] adCounts = { bachRecord.getTumorRefCount(), bachRecord.getTumorAltCount() };
        gBuilder.AD(adCounts);
        gBuilder.DP(bachRecord.getGermlineReadDepth());
        genoTypes.add(gBuilder.make());

        builder.genotypes(genoTypes);
        VariantContext variantContext = builder.make();

        variantContext.getCommonInfo().addFilter("PASS");
        variantContext.getCommonInfo().putAttribute(SNPEFF_IDENTIFIER, bachRecord.Annotations);

        bachRecord.setVariantContext(variantContext);

        SomaticVariant somVariant = SomaticVariantFactory.unfilteredInstance().createSomaticVariant(sampleId, variantContext);
        bachRecord.setSomaticVariant(somVariant);
    }
}
 
Example 3
Source File: PositiveAllelicDepth.java    From hmftools with GNU General Public License v3.0 5 votes vote down vote up
private void run() {
    fileWriter.writeHeader(fileReader.getFileHeader());
    for (final VariantContext context : fileReader) {

        final VariantContextBuilder builder = new VariantContextBuilder(context);
        final List<Genotype> genotypes = Lists.newArrayList();

        for (int genotypeIndex = 0; genotypeIndex < context.getGenotypes().size(); genotypeIndex++) {
            Genotype genotype = context.getGenotype(genotypeIndex);

            if (genotype.hasAD() && genotype.hasDP()) {
                int[] ad = genotype.getAD();
                int total = 0;
                boolean updateRecord = false;
                for (int i = 0; i < ad.length; i++) {
                    int adValue = ad[i];
                    if (adValue < 0) {
                        updateRecord = true;
                        ad[i] = Math.abs(adValue);
                    }
                    total += Math.abs(adValue);
                }

                if (updateRecord) {
                    LOGGER.info("Updated entry at {}:{}", context.getContig(), context.getStart());
                    Genotype updated = new GenotypeBuilder(genotype).AD(ad).DP(total).make();
                    genotypes.add(updated);
                }
                else {
                    genotypes.add(genotype);
                }
            }
        }

        builder.genotypes(genotypes);
        fileWriter.add(builder.make());
    }
}
 
Example 4
Source File: CalculateGenotypePosteriors.java    From gatk-protected with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
@Override
public void apply(final VariantContext variant,
                  final ReadsContext readsContext,
                  final ReferenceContext referenceContext,
                  final FeatureContext featureContext) {
    final Collection<VariantContext> vcs = featureContext.getValues(getDrivingVariantsFeatureInput());

    final Collection<VariantContext> otherVCs = featureContext.getValues(supportVariants);

    final int missing = supportVariants.size() - otherVCs.size();

    for ( final VariantContext vc : vcs ) {
        final VariantContext vc_familyPriors;
        final VariantContext vc_bothPriors;

        //do family priors first (if applicable)
        final VariantContextBuilder builder = new VariantContextBuilder(vc);
        //only compute family priors for biallelelic sites
        if (!skipFamilyPriors && vc.isBiallelic()){
            final GenotypesContext gc = famUtils.calculatePosteriorGLs(vc);
            builder.genotypes(gc);
        }
        VariantContextUtils.calculateChromosomeCounts(builder, false);
        vc_familyPriors = builder.make();

        if (!skipPopulationPriors) {
            vc_bothPriors = PosteriorProbabilitiesUtils.calculatePosteriorProbs(vc_familyPriors, otherVCs, missing * numRefIfMissing, globalPrior, !ignoreInputSamples, defaultToAC, useACoff);
        } else {
            final VariantContextBuilder builder2 = new VariantContextBuilder(vc_familyPriors);
            VariantContextUtils.calculateChromosomeCounts(builder, false);
            vc_bothPriors = builder2.make();
        }
        vcfWriter.add(vc_bothPriors);
    }
}
 
Example 5
Source File: SelectVariants.java    From gatk with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
/**
 * Helper method to subset a VC record, modifying some metadata stored in the INFO field (i.e. AN, AC, AF).
 *
 * @param vc       the VariantContext record to subset
 * @param preserveAlleles should we trim constant sequence from the beginning and/or end of all alleles, or preserve it?
 * @param removeUnusedAlternates removes alternate alleles with AC=0
 * @return the subsetted VariantContext
 */
private VariantContext subsetRecord(final VariantContext vc, final boolean preserveAlleles, final boolean removeUnusedAlternates) {
    //subContextFromSamples() always decodes the vc, which is a fairly expensive operation.  Avoid if possible
    if (noSamplesSpecified && !removeUnusedAlternates) {
        return vc;
    }

    // strip out the alternate alleles that aren't being used
    final VariantContext sub = vc.subContextFromSamples(samples, removeUnusedAlternates);

    // If no subsetting happened, exit now
    if (sub.getNSamples() == vc.getNSamples() && sub.getNAlleles() == vc.getNAlleles()) {
        return vc;
    }

    // fix the PL and AD values if sub has fewer alleles than original vc and remove a fraction of the genotypes if needed
    final GenotypesContext oldGs = sub.getGenotypes();
    GenotypesContext newGC = sub.getNAlleles() == vc.getNAlleles() ? oldGs :
            AlleleSubsettingUtils.subsetAlleles(oldGs, 0, vc.getAlleles(), sub.getAlleles(), GenotypeAssignmentMethod.DO_NOT_ASSIGN_GENOTYPES, vc.getAttributeAsInt(VCFConstants.DEPTH_KEY, 0));

    if (fractionGenotypes > 0) {
        final List<Genotype> genotypes = newGC.stream().map(genotype -> randomGenotypes.nextDouble() > fractionGenotypes ? genotype :
                new GenotypeBuilder(genotype).alleles(getNoCallAlleles(genotype.getPloidy())).noGQ().make()).collect(Collectors.toList());
        newGC = GenotypesContext.create(new ArrayList<>(genotypes));
    }

    // since the VC has been subset (either by sample or allele), we need to strip out the MLE tags
    final VariantContextBuilder builder = new VariantContextBuilder(sub);
    builder.rmAttributes(Arrays.asList(GATKVCFConstants.MLE_ALLELE_COUNT_KEY,GATKVCFConstants.MLE_ALLELE_FREQUENCY_KEY));
    builder.genotypes(newGC);
    addAnnotations(builder, vc, sub.getSampleNames());
    final VariantContext subset = builder.make();

    return preserveAlleles? subset : GATKVariantContextUtils.trimAlleles(subset,true,true);
}
 
Example 6
Source File: CalculateGenotypePosteriors.java    From gatk with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
@Override
public void apply(final VariantContext variant,
                  final ReadsContext readsContext,
                  final ReferenceContext referenceContext,
                  final FeatureContext featureContext) {

    final Collection<VariantContext> otherVCs = featureContext.getValues(supportVariants);

    //If no resource contains a matching variant, then add numRefIfMissing as a pseudocount to the priors
    List<VariantContext> resourcesWithMatchingStarts = otherVCs.stream()
            .filter(vc -> variant.getStart() == vc.getStart()).collect(Collectors.toList());

    //do family priors first (if applicable)
    final VariantContextBuilder builder = new VariantContextBuilder(variant);
    //only compute family priors for biallelelic sites
    if (!skipFamilyPriors && variant.isBiallelic()){
        final GenotypesContext gc = famUtils.calculatePosteriorGLs(variant);
        builder.genotypes(gc);
    }
    VariantContextUtils.calculateChromosomeCounts(builder, false);
    final VariantContext vc_familyPriors = builder.make();

    final VariantContext vc_bothPriors;
    if (!skipPopulationPriors) {
        vc_bothPriors = PosteriorProbabilitiesUtils.calculatePosteriorProbs(vc_familyPriors, resourcesWithMatchingStarts,
                resourcesWithMatchingStarts.isEmpty() ? numRefIfMissing : 0, options);
    } else {
        vc_bothPriors = vc_familyPriors;
    }
    vcfWriter.add(vc_bothPriors);
}
 
Example 7
Source File: LiftoverUtils.java    From picard with MIT License 5 votes vote down vote up
protected static VariantContextBuilder reverseComplementVariantContext(final VariantContext source, final Interval target, final ReferenceSequence refSeq) {
    if (target.isPositiveStrand()) {
        throw new IllegalArgumentException("This should only be called for negative strand liftovers");
    }
    final int start = target.getStart();
    final int stop = target.getEnd();

    final List<Allele> origAlleles = new ArrayList<>(source.getAlleles());
    final VariantContextBuilder vcb = new VariantContextBuilder(source);

    vcb.rmAttribute(VCFConstants.END_KEY)
            .chr(target.getContig())
            .start(start)
            .stop(stop)
            .alleles(reverseComplementAlleles(origAlleles));

    if (isIndelForLiftover(source)) {
        // check that the reverse complemented bases match the new reference
        if (referenceAlleleDiffersFromReferenceForIndel(vcb.getAlleles(), refSeq, start, stop)) {
            return null;
        }
        leftAlignVariant(vcb, start, stop, vcb.getAlleles(), refSeq);
    }

    vcb.genotypes(fixGenotypes(source.getGenotypes(), origAlleles, vcb.getAlleles()));

    return vcb;
}
 
Example 8
Source File: LiftoverVcfTest.java    From picard with MIT License 5 votes vote down vote up
@Test(dataProvider = "leftAlignAllelesData")
public void testLeftAlignVariants(final VariantContext source, final ReferenceSequence reference, final VariantContext result) {
    VariantContextBuilder vcb = new VariantContextBuilder(source);

    LiftoverUtils.leftAlignVariant(vcb, source.getStart(), source.getEnd(), source.getAlleles(), reference);
    vcb.genotypes(LiftoverUtils.fixGenotypes(source.getGenotypes(), source.getAlleles(), vcb.getAlleles()));

    VcfTestUtils.assertEquals(vcb.make(), result);
}
 
Example 9
Source File: VcfOutputRenderer.java    From gatk with BSD 3-Clause "New" or "Revised" License 4 votes vote down vote up
@Override
public void write(final VariantContext variant, final FuncotationMap txToFuncotationMap) {

    // Create a new variant context builder:
    final VariantContextBuilder variantContextOutputBuilder = new VariantContextBuilder(variant);

    final StringBuilder funcotatorAnnotationStringBuilder = new StringBuilder();

    // Get the old VCF Annotation field and append the new information to it:
    final Object existingAnnotation = variant.getAttribute(FUNCOTATOR_VCF_FIELD_NAME, null);
    final List<String> existingAlleleAnnotations;
    if ( existingAnnotation != null) {
        existingAlleleAnnotations = Utils.split(existingAnnotation.toString(), ',');
    }
    else {
        existingAlleleAnnotations = Collections.emptyList();
    }

    // Go through each allele and add it to the writer separately:
    final List<Allele> alternateAlleles = variant.getAlternateAlleles();
    for ( int alleleIndex = 0; alleleIndex < alternateAlleles.size() ; ++alleleIndex ) {

        final Allele altAllele = alternateAlleles.get(alleleIndex);

        if ( alleleIndex < existingAlleleAnnotations.size() ) {
            funcotatorAnnotationStringBuilder.append( existingAlleleAnnotations.get(alleleIndex) );
            funcotatorAnnotationStringBuilder.append(FIELD_DELIMITER);
        }

        for (final String txId : txToFuncotationMap.getTranscriptList()) {
            funcotatorAnnotationStringBuilder.append(START_TRANSCRIPT_DELIMITER);
            final List<Funcotation> funcotations = txToFuncotationMap.get(txId);
            final Funcotation manualAnnotationFuncotation = createManualAnnotationFuncotation(altAllele);

            funcotatorAnnotationStringBuilder.append(
                    Stream.concat(funcotations.stream(), Stream.of(manualAnnotationFuncotation))
                            .filter(f -> f.getAltAllele().equals(altAllele))
                            .filter(f -> f.getFieldNames().size() > 0)
                            .filter(f -> !f.getDataSourceName().equals(FuncotatorConstants.DATASOURCE_NAME_FOR_INPUT_VCFS))
                            .map(VcfOutputRenderer::adjustIndelAlleleInformation)
                            .map(f -> FuncotatorUtils.renderSanitizedFuncotationForVcf(f, finalFuncotationFieldNames))
                            .collect(Collectors.joining(FIELD_DELIMITER))
            );

            funcotatorAnnotationStringBuilder.append(END_TRANSCRIPT_DELIMITER + ALL_TRANSCRIPT_DELIMITER);
        }
        // We have a trailing "#" - we need to remove it:
        funcotatorAnnotationStringBuilder.deleteCharAt(funcotatorAnnotationStringBuilder.length()-1);
        funcotatorAnnotationStringBuilder.append(VCFConstants.INFO_FIELD_ARRAY_SEPARATOR);
    }

    // We have a trailing "," - we need to remove it:
    funcotatorAnnotationStringBuilder.deleteCharAt(funcotatorAnnotationStringBuilder.length()-1);

    // Add our new annotation:
    variantContextOutputBuilder.attribute(FUNCOTATOR_VCF_FIELD_NAME, funcotatorAnnotationStringBuilder.toString());

    // Add the genotypes from the variant:
    variantContextOutputBuilder.genotypes( variant.getGenotypes() );

    // Render and add our VCF line:
    vcfWriter.add( variantContextOutputBuilder.make() );
}
 
Example 10
Source File: LiftoverVcfTest.java    From picard with MIT License 4 votes vote down vote up
@DataProvider(name = "noCallAndSymbolicData")
public Iterator<Object[]> noCallAndSymbolicData() {

    final VariantContextBuilder builder = new VariantContextBuilder().source("test1").chr("chr1").attribute("TEST_ATTRIBUTE", 1).attribute("TEST_ATTRIBUTE2", "Hi").attribute("TEST_ATTRIBUTE3", new double[]{1.2, 2.1});
    final VariantContextBuilder result_builder = new VariantContextBuilder().source("test1").chr("chr1").attribute("TEST_ATTRIBUTE", 1).attribute("TEST_ATTRIBUTE2", "Hi").attribute("TEST_ATTRIBUTE3", new double[]{1.2, 2.1});
    result_builder.attribute(LiftoverVcf.ORIGINAL_CONTIG, "chr1");
    final GenotypeBuilder genotypeBuilder = new GenotypeBuilder("test1");
    final GenotypeBuilder resultGenotypeBuilder = new GenotypeBuilder("test1");
    final List<Object[]> tests = new ArrayList<>();

    final Allele CRef = Allele.create("C", true);
    final Allele GRef = Allele.create("G", true);
    final Allele T = Allele.create("T", false);
    final Allele A = Allele.create("A", false);
    final Allele DEL = Allele.create("*", false);
    final Allele nonRef = Allele.create("<*>", false);

    final LiftOver liftOver = new LiftOver(TWO_INTERVAL_CHAIN_FILE);
    final LiftOver liftOverRC = new LiftOver(CHAIN_FILE);

    builder.source("test1");
    int start = 10;
    builder.start(start).stop(start).alleles(CollectionUtil.makeList(CRef, T, nonRef));
    result_builder.start(start).stop(start).alleles(CollectionUtil.makeList(CRef, T, nonRef));
    genotypeBuilder.alleles(CollectionUtil.makeList(Allele.create("."), Allele.create(".")));
    resultGenotypeBuilder.alleles(CollectionUtil.makeList(Allele.create("."), Allele.create(".")));
    builder.genotypes(genotypeBuilder.make());
    result_builder.genotypes(resultGenotypeBuilder.make());
    result_builder.attribute(LiftoverVcf.ORIGINAL_START, start);

    tests.add(new Object[]{liftOver, builder.make(), result_builder.make(), false});

    builder.source("test2");
    builder.start(start).stop(start).alleles(CollectionUtil.makeList(CRef, T, DEL));
    result_builder.start(start).stop(start).alleles(CollectionUtil.makeList(CRef, T, DEL));
    result_builder.attribute(LiftoverVcf.ORIGINAL_START, start);
    genotypeBuilder.alleles(CollectionUtil.makeList(T, DEL));
    resultGenotypeBuilder.alleles(CollectionUtil.makeList(T, DEL));
    builder.genotypes(genotypeBuilder.make());
    result_builder.genotypes(resultGenotypeBuilder.make());

    tests.add(new Object[]{liftOver, builder.make(), result_builder.make(), false});

    //reverse complement
    builder.source("test3");
    int offset = 3;
    start = CHAIN_SIZE - offset;
    int liftedStart = 1 + offset;
    builder.start(start).stop(start).alleles(CollectionUtil.makeList(CRef, T, DEL, nonRef));
    result_builder.start(liftedStart).stop(liftedStart).alleles(CollectionUtil.makeList(GRef, A, DEL, nonRef));
    result_builder.attribute(LiftoverVcf.ORIGINAL_START, start);
    result_builder.attribute(LiftoverVcf.ORIGINAL_ALLELES, LiftoverUtils.allelesToStringList(builder.getAlleles()));
    result_builder.attribute(LiftoverUtils.REV_COMPED_ALLELES, true);

    genotypeBuilder.alleles(CollectionUtil.makeList(T, DEL));
    resultGenotypeBuilder.alleles(CollectionUtil.makeList(A, DEL));
    builder.genotypes(genotypeBuilder.make());
    result_builder.genotypes(resultGenotypeBuilder.make());

    tests.add(new Object[]{liftOverRC, builder.make(), result_builder.make(), true});

    builder.source("test4");
    offset = 4;
    start = CHAIN_SIZE - offset;
    liftedStart = 1 + offset;
    builder.start(start).stop(start).alleles(CollectionUtil.makeList(CRef, T, nonRef));
    result_builder.start(liftedStart).stop(liftedStart).alleles(CollectionUtil.makeList(GRef, A, nonRef));
    result_builder.attribute(LiftoverVcf.ORIGINAL_START, start);
    result_builder.attribute(LiftoverVcf.ORIGINAL_ALLELES, LiftoverUtils.allelesToStringList(builder.getAlleles()));
    genotypeBuilder.alleles(CollectionUtil.makeList(T, Allele.NO_CALL));
    resultGenotypeBuilder.alleles(CollectionUtil.makeList(A, Allele.NO_CALL));
    builder.genotypes(genotypeBuilder.make());
    result_builder.genotypes(resultGenotypeBuilder.make());

    tests.add(new Object[]{liftOverRC, builder.make(), result_builder.make(), true});

    return tests.iterator();
}
 
Example 11
Source File: LiftoverVcfTest.java    From picard with MIT License 4 votes vote down vote up
@DataProvider(name = "snpWithChangedRef")
public Iterator<Object[]> snpWithChangedRef() {

    final Allele RefA = Allele.create("A", true);
    final Allele RefC = Allele.create("C", true);

    final Allele A = Allele.create("A", false);
    final Allele C = Allele.create("C", false);

    final List<Object[]> tests = new ArrayList<>();

    final VariantContextBuilder builder = new VariantContextBuilder().source("test1").chr("chr1");
    final GenotypeBuilder genotypeBuilder = new GenotypeBuilder("test1");
    final GenotypeBuilder resultGenotypeBuilder = new GenotypeBuilder("test1");
    final VariantContextBuilder result_builder = new VariantContextBuilder().source("test1").chr("chr1").attribute(LiftoverUtils.SWAPPED_ALLELES, true);

    // simple snp
    int start = 12;
    int stop = 12;
    builder.start(start).stop(stop).alleles(CollectionUtil.makeList(RefA, C));
    result_builder.start(start).stop(stop).alleles(CollectionUtil.makeList(A, RefC));

    genotypeBuilder.alleles(builder.getAlleles()).AD(new int[]{5, 4}).PL(new int[]{40, 0, 60});
    resultGenotypeBuilder.alleles(result_builder.getAlleles()).AD(new int[]{4, 5}).PL(new int[]{60, 0, 40});

    builder.genotypes(genotypeBuilder.make());
    result_builder.genotypes(resultGenotypeBuilder.make());

    tests.add(new Object[]{builder.make(), result_builder.make()});

    builder.start(start).stop(stop).alleles(CollectionUtil.makeList(RefA, C));
    result_builder.start(start).stop(stop).alleles(CollectionUtil.makeList(RefC, A));

    genotypeBuilder.alleles(Arrays.asList(C, C)).AD(new int[]{0, 10}).PL(new int[]{400, 40, 0});
    resultGenotypeBuilder.alleles(Arrays.asList(RefC, RefC)).AD(new int[]{10, 0}).PL(new int[]{0, 40, 400});

    builder.genotypes(genotypeBuilder.make());
    result_builder.genotypes(resultGenotypeBuilder.make());

    tests.add(new Object[]{builder.make(), result_builder.make()});
    return tests.iterator();
}
 
Example 12
Source File: CallingMetricAccumulatorTest.java    From picard with MIT License 4 votes vote down vote up
@DataProvider(name = "getSingletonSampleData")
public Object[][] getSingletonSampleData() {
    final List<Object[]> retval = new ArrayList<>(10);

    final Allele ARef = Allele.create("A", true);
    final Allele G = Allele.create("G", false);
    final Allele C = Allele.create("C", false);

    final GenotypeBuilder genotypeBuilder = new GenotypeBuilder().alleles(CollectionUtil.makeList(ARef, C));
    final VariantContextBuilder builder = new VariantContextBuilder();

    // one het
    final Genotype het = genotypeBuilder.name("het").make();
    builder.chr("1").start(1).stop(1).alleles(CollectionUtil.makeList(ARef, C, G)).genotypes(Collections.singletonList(het));
    retval.add(new Object[]{builder.make(), "het"});

    //a het and a hom ref
    final Genotype homref = genotypeBuilder.name("homref").alleles(CollectionUtil.makeList(ARef)).make();
    builder.genotypes(CollectionUtil.makeList(het, homref));
    retval.add(new Object[]{builder.make(), "het"});

    // a het, a homvar and a homref
    final Genotype homvar = genotypeBuilder.name("homvar").alleles(CollectionUtil.makeList(C)).make();
    builder.genotypes(CollectionUtil.makeList(het, homref, homvar));
    retval.add(new Object[]{builder.make(), null});

    // two hets and a homref
    final Genotype het2 = genotypeBuilder.name("het2").alleles(CollectionUtil.makeList(ARef, G)).make();
    builder.genotypes(CollectionUtil.makeList(het, homref, het2));
    retval.add(new Object[]{builder.make(), null});

    // a homvar
    builder.genotypes(CollectionUtil.makeList(homvar));
    retval.add(new Object[]{builder.make(), null});

    // a homvar, and a homref
    builder.genotypes(CollectionUtil.makeList(homvar, homref));
    retval.add(new Object[]{builder.make(), null});

    // two homrefs
    final Genotype homref2 = genotypeBuilder.name("homref2").alleles(CollectionUtil.makeList(ARef)).make();
    builder.genotypes(CollectionUtil.makeList(homref, homref2));
    retval.add(new Object[]{builder.make(), null});

    return retval.toArray(new Object[retval.size()][]);
}
 
Example 13
Source File: LiftoverUtils.java    From picard with MIT License 4 votes vote down vote up
/**
 * method to swap the reference and alt alleles of a bi-allelic, SNP
 *
 * @param vc                   the {@link VariantContext} (bi-allelic SNP) that needs to have it's REF and ALT alleles swapped.
 * @param annotationsToReverse INFO field annotations (of double value) that will be reversed (x->1-x)
 * @param annotationsToDrop    INFO field annotations that will be dropped from the result since they are invalid when REF and ALT are swapped
 * @return a new {@link VariantContext} with alleles swapped, INFO fields modified and in the genotypes, GT, AD and PL corrected appropriately
 */
public static VariantContext swapRefAlt(final VariantContext vc, final Collection<String> annotationsToReverse, final Collection<String> annotationsToDrop) {

    if (!vc.isBiallelic() || !vc.isSNP()) {
        throw new IllegalArgumentException("swapRefAlt can only process biallelic, SNPS, found " + vc.toString());
    }

    final VariantContextBuilder swappedBuilder = new VariantContextBuilder(vc);

    swappedBuilder.attribute(SWAPPED_ALLELES, true);

    // Use getBaseString() (rather than the Allele itself) in order to create new Alleles with swapped
    // reference and non-variant attributes
    swappedBuilder.alleles(Arrays.asList(vc.getAlleles().get(1).getBaseString(), vc.getAlleles().get(0).getBaseString()));

    final Map<Allele, Allele> alleleMap = new HashMap<>();

    // A mapping from the old allele to the new allele, to be used when fixing the genotypes
    alleleMap.put(vc.getAlleles().get(0), swappedBuilder.getAlleles().get(1));
    alleleMap.put(vc.getAlleles().get(1), swappedBuilder.getAlleles().get(0));

    final GenotypesContext swappedGenotypes = GenotypesContext.create(vc.getGenotypes().size());
    for (final Genotype genotype : vc.getGenotypes()) {
        final List<Allele> swappedAlleles = new ArrayList<>();
        for (final Allele allele : genotype.getAlleles()) {
            if (allele.isNoCall()) {
                swappedAlleles.add(allele);
            } else {
                swappedAlleles.add(alleleMap.get(allele));
            }
        }
        // Flip AD
        final GenotypeBuilder builder = new GenotypeBuilder(genotype).alleles(swappedAlleles);
        if (genotype.hasAD() && genotype.getAD().length == 2) {
            final int[] ad = ArrayUtils.clone(genotype.getAD());
            ArrayUtils.reverse(ad);
            builder.AD(ad);
        } else {
            builder.noAD();
        }

        //Flip PL
        if (genotype.hasPL() && genotype.getPL().length == 3) {
            final int[] pl = ArrayUtils.clone(genotype.getPL());
            ArrayUtils.reverse(pl);
            builder.PL(pl);
        } else {
            builder.noPL();
        }
        swappedGenotypes.add(builder.make());
    }
    swappedBuilder.genotypes(swappedGenotypes);

    for (final String key : vc.getAttributes().keySet()) {
        if (annotationsToDrop.contains(key)) {
            swappedBuilder.rmAttribute(key);
        } else if (annotationsToReverse.contains(key) && !vc.getAttributeAsString(key, "").equals(VCFConstants.MISSING_VALUE_v4)) {
            final double attributeToReverse = vc.getAttributeAsDouble(key, -1);

            if (attributeToReverse < 0 || attributeToReverse > 1) {
                log.warn("Trying to reverse attribute " + key +
                        " but found value that isn't between 0 and 1: (" + attributeToReverse + ") in variant " + vc + ". Results might be wrong.");
            }
            swappedBuilder.attribute(key, 1 - attributeToReverse);
        }
    }

    return swappedBuilder.make();
}
 
Example 14
Source File: CombineGenotypingArrayVcfs.java    From picard with MIT License 4 votes vote down vote up
/**
 * Merges multiple VariantContexts all for the same locus into a single hybrid.
 *
 * @param variantContexts           list of VCs
 * @return new VariantContext       representing the merge of variantContexts
 */
public static VariantContext merge(final List<VariantContext> variantContexts) {
    if ( variantContexts == null || variantContexts.isEmpty() )
        return null;

    // establish the baseline info from the first VC
    final VariantContext first = variantContexts.get(0);
    final String name = first.getSource();

    final Set<String> filters = new HashSet<>();

    int depth = 0;
    double log10PError = CommonInfo.NO_LOG10_PERROR;
    boolean anyVCHadFiltersApplied = false;
    GenotypesContext genotypes = GenotypesContext.create();

    // Go through all the VCs, verify that the loci and ID and other attributes agree.
    final Map<String, Object> firstAttributes = first.getAttributes();
    for (final VariantContext vc : variantContexts ) {
        if ((vc.getStart() != first.getStart()) || (!vc.getContig().equals(first.getContig()))) {
            throw new PicardException("Mismatch in loci among input VCFs");
        }
        if (!vc.getID().equals(first.getID())) {
            throw new PicardException("Mismatch in ID field among input VCFs");
        }
        if (!vc.getReference().equals(first.getReference())) {
            throw new PicardException("Mismatch in REF allele among input VCFs");
        }
        checkThatAllelesMatch(vc, first);

        genotypes.addAll(vc.getGenotypes());

        // We always take the QUAL of the first VC with a non-MISSING qual for the combined value
        if ( log10PError == CommonInfo.NO_LOG10_PERROR )
            log10PError =  vc.getLog10PError();

        filters.addAll(vc.getFilters());
        anyVCHadFiltersApplied |= vc.filtersWereApplied();

        // add attributes
        // special case DP (add it up)
        if (vc.hasAttribute(VCFConstants.DEPTH_KEY))
            depth += vc.getAttributeAsInt(VCFConstants.DEPTH_KEY, 0);

        // Go through all attributes - if any differ (other than AC, AF, or AN) that's an error.  (recalc AC, AF, and AN later)
        for (final Map.Entry<String, Object> p : vc.getAttributes().entrySet()) {
            final String key = p.getKey();
            if ((!key.equals("AC")) && (!key.equals("AF")) && (!key.equals("AN")) && (!key.equals("devX_AB")) && (!key.equals("devY_AB"))) {
                final Object value = p.getValue();
                final Object extantValue = firstAttributes.get(key);
                if (extantValue == null) {
                    // attribute in one VCF but not another.  Die!
                    throw new PicardException("Attribute '" + key + "' not found in all VCFs");
                }
                else if (!extantValue.equals(value)) {
                    // Attribute disagrees in value between one VCF Die! (if not AC, AF, nor AN, skipped above)
                    throw new PicardException("Values for attribute '" + key + "' disagrees among input VCFs");
                }
            }
        }
    }

    if (depth > 0)
        firstAttributes.put(VCFConstants.DEPTH_KEY, String.valueOf(depth));

    final VariantContextBuilder builder = new VariantContextBuilder().source(name).id(first.getID());
    builder.loc(first.getContig(), first.getStart(), first.getEnd());
    builder.alleles(first.getAlleles());
    builder.genotypes(genotypes);

    builder.attributes(new TreeMap<>(firstAttributes));
    // AC, AF, and AN are recalculated here
    VariantContextUtils.calculateChromosomeCounts(builder, false);

    builder.log10PError(log10PError);
    if ( anyVCHadFiltersApplied ) {
        builder.filters(filters.isEmpty() ? filters : new TreeSet<>(filters));
    }

    return builder.make();
}
 
Example 15
Source File: MergePedIntoVcf.java    From picard with MIT License 4 votes vote down vote up
/**
 * Writes out the VariantContext objects in the order presented to the supplied output file
 * in VCF format.
 */
private void writeVcf(final CloseableIterator<VariantContext> variants,
                      final File output,
                      final SAMSequenceDictionary dict,
                      final VCFHeader vcfHeader, ZCallPedFile zCallPedFile) {

    try(VariantContextWriter writer = new VariantContextWriterBuilder()
            .setOutputFile(output)
            .setReferenceDictionary(dict)
            .setOptions(VariantContextWriterBuilder.DEFAULT_OPTIONS)
            .build()) {
        writer.writeHeader(vcfHeader);
        while (variants.hasNext()) {
            final VariantContext context = variants.next();

            final VariantContextBuilder builder = new VariantContextBuilder(context);
            if (zCallThresholds.containsKey(context.getID())) {
                final String[] zThresh = zCallThresholds.get(context.getID());
                builder.attribute(InfiniumVcfFields.ZTHRESH_X, zThresh[0]);
                builder.attribute(InfiniumVcfFields.ZTHRESH_Y, zThresh[1]);
            }
            final Genotype originalGenotype = context.getGenotype(0);
            final Map<String, Object> newAttributes = originalGenotype.getExtendedAttributes();
            final VCFEncoder vcfEncoder = new VCFEncoder(vcfHeader, false, false);
            final Map<Allele, String> alleleMap = vcfEncoder.buildAlleleStrings(context);

            final String zCallAlleles = zCallPedFile.getAlleles(context.getID());
            if (zCallAlleles == null) {
                throw new PicardException("No zCall alleles found for snp " + context.getID());
            }
            final List<Allele> zCallPedFileAlleles = buildNewAllelesFromZCall(zCallAlleles, context.getAttributes());
            newAttributes.put(InfiniumVcfFields.GTA, alleleMap.get(originalGenotype.getAllele(0)) + UNPHASED + alleleMap.get(originalGenotype.getAllele(1)));
            newAttributes.put(InfiniumVcfFields.GTZ, alleleMap.get(zCallPedFileAlleles.get(0)) + UNPHASED + alleleMap.get(zCallPedFileAlleles.get(1)));

            final Genotype newGenotype = GenotypeBuilder.create(originalGenotype.getSampleName(), zCallPedFileAlleles,
                    newAttributes);
            builder.genotypes(newGenotype);
            logger.record("0", 0);
            // AC, AF, and AN are recalculated here
            VariantContextUtils.calculateChromosomeCounts(builder, false);
            final VariantContext newContext = builder.make();
            writer.add(newContext);
        }
    }
}
 
Example 16
Source File: GtcToVcf.java    From picard with MIT License 4 votes vote down vote up
private VariantContext makeVariantContext(Build37ExtendedIlluminaManifestRecord record, final InfiniumGTCRecord gtcRecord,
                                          final InfiniumEGTFile egtFile, final ProgressLogger progressLogger) {
    // If the record is not flagged as errant in the manifest we include it in the VCF
    Allele A = record.getAlleleA();
    Allele B = record.getAlleleB();
    Allele ref = record.getRefAllele();

    final String chr = record.getB37Chr();
    final Integer position = record.getB37Pos();
    final Integer endPosition = position + ref.length() - 1;
    Integer egtIndex = egtFile.rsNameToIndex.get(record.getName());
    if (egtIndex == null) {
        throw new PicardException("Found no record in cluster file for manifest entry '" + record.getName() + "'");
    }

    progressLogger.record(chr, position);

    // Create list of unique alleles
    final List<Allele> assayAlleles = new ArrayList<>();
    assayAlleles.add(ref);

    if (A.equals(B)) {
        throw new PicardException("Found same allele (" + A.getDisplayString() + ") for A and B ");
    }

    if (!ref.equals(A, true)) {
        assayAlleles.add(A);
    }

    if (!ref.equals(B, true)) {
        assayAlleles.add(B);
    }

    final String sampleName = FilenameUtils.removeExtension(INPUT.getName());
    final Genotype genotype = getGenotype(sampleName, gtcRecord, record, A, B);

    final VariantContextBuilder builder = new VariantContextBuilder();

    builder.source(record.getName());
    builder.chr(chr);
    builder.start(position);
    builder.stop(endPosition);
    builder.alleles(assayAlleles);
    builder.log10PError(VariantContext.NO_LOG10_PERROR);
    builder.id(record.getName());
    builder.genotypes(genotype);

    VariantContextUtils.calculateChromosomeCounts(builder, false);

    //custom info fields
    builder.attribute(InfiniumVcfFields.ALLELE_A, record.getAlleleA());
    builder.attribute(InfiniumVcfFields.ALLELE_B, record.getAlleleB());
    builder.attribute(InfiniumVcfFields.ILLUMINA_STRAND, record.getIlmnStrand());
    builder.attribute(InfiniumVcfFields.PROBE_A, record.getAlleleAProbeSeq());
    builder.attribute(InfiniumVcfFields.PROBE_B, record.getAlleleBProbeSeq());
    builder.attribute(InfiniumVcfFields.BEADSET_ID, record.getBeadSetId());
    builder.attribute(InfiniumVcfFields.ILLUMINA_CHR, record.getChr());
    builder.attribute(InfiniumVcfFields.ILLUMINA_POS, record.getPosition());
    builder.attribute(InfiniumVcfFields.ILLUMINA_BUILD, record.getGenomeBuild());
    builder.attribute(InfiniumVcfFields.SOURCE, record.getSource().replace(' ', '_'));
    builder.attribute(InfiniumVcfFields.GC_SCORE, formatFloatForVcf(egtFile.totalScore[egtIndex]));

    for (InfiniumVcfFields.GENOTYPE_VALUES gtValue : InfiniumVcfFields.GENOTYPE_VALUES.values()) {
        final int ordinalValue = gtValue.ordinal();
        builder.attribute(InfiniumVcfFields.N[ordinalValue], egtFile.n[egtIndex][ordinalValue]);
        builder.attribute(InfiniumVcfFields.DEV_R[ordinalValue], formatFloatForVcf(egtFile.devR[egtIndex][ordinalValue]));
        builder.attribute(InfiniumVcfFields.MEAN_R[ordinalValue], formatFloatForVcf(egtFile.meanR[egtIndex][ordinalValue]));
        builder.attribute(InfiniumVcfFields.DEV_THETA[ordinalValue], formatFloatForVcf(egtFile.devTheta[egtIndex][ordinalValue]));
        builder.attribute(InfiniumVcfFields.MEAN_THETA[ordinalValue], formatFloatForVcf(egtFile.meanTheta[egtIndex][ordinalValue]));

        final EuclideanValues genotypeEuclideanValues = polarToEuclidean(egtFile.meanR[egtIndex][ordinalValue], egtFile.devR[egtIndex][ordinalValue],
                egtFile.meanTheta[egtIndex][ordinalValue], egtFile.devTheta[egtIndex][ordinalValue]);
        builder.attribute(InfiniumVcfFields.DEV_X[ordinalValue], formatFloatForVcf(genotypeEuclideanValues.devX));
        builder.attribute(InfiniumVcfFields.MEAN_X[ordinalValue], formatFloatForVcf(genotypeEuclideanValues.meanX));
        builder.attribute(InfiniumVcfFields.DEV_Y[ordinalValue], formatFloatForVcf(genotypeEuclideanValues.devY));
        builder.attribute(InfiniumVcfFields.MEAN_Y[ordinalValue], formatFloatForVcf(genotypeEuclideanValues.meanY));
    }


    final String rsid = record.getRsId();
    if (StringUtils.isNotEmpty(rsid)) {
        builder.attribute(InfiniumVcfFields.RS_ID, rsid);
    }
    if (egtFile.totalScore[egtIndex] == 0.0) {
        builder.filter(InfiniumVcfFields.ZEROED_OUT_ASSAY);
        if (genotype.isCalled()) {
            if (DO_NOT_ALLOW_CALLS_ON_ZEROED_OUT_ASSAYS) {
                throw new PicardException("Found a call (genotype: " + genotype + ") on a zeroed out Assay!!");
            } else {
                log.warn("Found a call (genotype: " + genotype + ") on a zeroed out Assay. " +
                        "This could occur if you called genotypes on a different cluster file than used here.");
            }
        }
    }
    if (record.isDupe()) {
        builder.filter(InfiniumVcfFields.DUPE);
    }
    return builder.make();
}
 
Example 17
Source File: VariantToVcf.java    From genomewarp with Apache License 2.0 4 votes vote down vote up
private static VariantContext getContextFromVariant(VCFHeader header, Variant in) {
  // Create allele list
  String refBases = in.getReferenceBases();
  List<String> altBases = in.getAlternateBasesList();
  List<Allele> alleleList = new ArrayList<>();
  alleleList.add(Allele.create(refBases.getBytes(StandardCharsets.UTF_8), true));
  for (String base : altBases) {
    alleleList.add(Allele.create(base.getBytes(StandardCharsets.UTF_8), false));
  }

  // Note htsjdk start/end are both 1-based closed
  VariantContextBuilder builder = new VariantContextBuilder(SOURCE, in.getReferenceName(),
      in.getStart() + 1, in.getEnd(), alleleList);

  // Set Quality
  if (in.getQuality() != 0.0) {
    builder.log10PError(-in.getQuality() / 10);
  }

  // Set names
  int numNames = in.getNamesCount();
  int i = 0;
  String namesString = "";
  for (String name : in.getNamesList()) {
    if (i == numNames - 1) {
      break;
    }
    namesString += name + ",";
  }
  if (numNames == 0) {
    builder.noID();
  } else {
    // Also handle fence post
    builder.id(namesString + in.getNames(numNames - 1));
  }

  // Set filters
  boolean hadFilter = false;
  for (String filter : in.getFilterList()) {
    hadFilter = true;
    if (filter.equals(VCFConstants.PASSES_FILTERS_v4)) {
      builder.passFilters();
      break;
    }
    if (filter.equals(VCFConstants.MISSING_VALUE_v4)) {
      builder.unfiltered();
      break;
    }

    builder.filter(filter);
  }
  if (!hadFilter) {
    builder.unfiltered();
  }

  // Set info
  Map<String, Object> toSet = new HashMap<>();
  for (Map.Entry<String, ListValue> entry : in.getInfo().entrySet()) {
    String currKey = entry.getKey();
    List<Value> currValue = entry.getValue().getValuesList();
    int currSize = currValue.size();

    if (currSize == 0) {
      // If the key is present in the info map, there must
      // be non-zero attributes associated with it
      throw new IllegalStateException(String.format("info field %s has length 0", currKey));
    }

    VCFHeaderLineType type = header.getInfoHeaderLine(currKey).getType();
    if (currSize == 1) {
      toSet.put(currKey, parseObject(currKey, currValue.get(0), type));
      continue;
    }

    List<Object> objectList = new ArrayList<>();
    for (Value val : currValue) {
      objectList.add(parseObject(currKey, val, type));
    }
    toSet.put(currKey, objectList);
  }
  builder.attributes(toSet);

  // Set calls
  List<Genotype> genotypes = new ArrayList<>();
  for (VariantCall vc : in.getCallsList()) {
    genotypes.add(getGenotypeFromVariantCall(header, vc, alleleList));
  }
  if (genotypes.size() == 0) {
    builder.noGenotypes();
  } else {
    builder.genotypes(genotypes);
  }

  return builder.make();
}