htsjdk.variant.vcf.VCFInfoHeaderLine Java Examples

The following examples show how to use htsjdk.variant.vcf.VCFInfoHeaderLine. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: StrelkaPostProcessApplication.java    From hmftools with GNU General Public License v3.0 6 votes vote down vote up
@NotNull
public static VCFHeader generateOutputHeader(@NotNull final VCFHeader header, @NotNull final String sampleName) {
    final VCFHeader outputVCFHeader = new VCFHeader(header.getMetaDataInInputOrder(), Sets.newHashSet(sampleName));
    outputVCFHeader.addMetaDataLine(VCFStandardHeaderLines.getFormatLine("GT"));
    outputVCFHeader.addMetaDataLine(VCFStandardHeaderLines.getFormatLine("AD"));

    outputVCFHeader.addMetaDataLine(new VCFHeaderLine("StrelkaGATKCompatibility",
            "Added GT fields to strelka calls for gatk compatibility."));
    outputVCFHeader.addMetaDataLine(new VCFInfoHeaderLine("MAPPABILITY", 1, VCFHeaderLineType.Float, "Mappability (percentage)"));
    outputVCFHeader.addMetaDataLine(new VCFInfoHeaderLine("SOMATIC_PON_COUNT",
            1,
            VCFHeaderLineType.Integer,
            "Number of times the variant appears in the somatic PON"));
    outputVCFHeader.addMetaDataLine(new VCFInfoHeaderLine("GERMLINE_PON_COUNT",
            1,
            VCFHeaderLineType.Integer,
            "Number of times the variant appears in the germline PON"));
    return outputVCFHeader;
}
 
Example #2
Source File: ReferenceConfidenceVariantContextMerger.java    From gatk with BSD 3-Clause "New" or "Revised" License 6 votes vote down vote up
private Comparable<?> parseNumericInfoAttributeValue(final VCFHeader vcfHeader, final String key, final String stringValue) {
    final VCFInfoHeaderLine infoLine = vcfHeader.getInfoHeaderLine(key);
    if (infoLine == null) {
        oneShotHeaderLineLogger.warn(String.format("At least one attribute was found (%s) for which there is no corresponding header line", key));
        if (stringValue.contains(".")) {
            return Double.parseDouble(stringValue);
        } else {
            return Integer.parseInt(stringValue);
        }
    }
    switch (infoLine.getType()) {
        case Integer:
            return Integer.parseInt(stringValue);
        case Float:
            return Double.parseDouble(stringValue);
        default:
            throw new NumberFormatException(
                    String.format(
                            "The VCF header specifies type %s type for INFO attribute key %s, but a numeric value is required",
                            infoLine.getType().name(),
                            key)
            );
    }
}
 
Example #3
Source File: FilterVariantTranches.java    From gatk with BSD 3-Clause "New" or "Revised" License 6 votes vote down vote up
private void writeVCFHeader(VariantContextWriter vcfWriter) {
    // setup the header fields
    final VCFHeader inputHeader = getHeaderForVariants();
    Set<VCFHeaderLine> hInfo = new LinkedHashSet<VCFHeaderLine>();
    hInfo.addAll(inputHeader.getMetaDataInSortedOrder());

    boolean hasInfoKey = hInfo.stream().anyMatch(
            x -> x instanceof VCFInfoHeaderLine && ((VCFInfoHeaderLine) x).getID().equals(infoKey));
    if (!hasInfoKey){
        throw new UserException(String.format("Input VCF does not contain a header line for specified info key:%s", infoKey));
    }

    if (removeOldFilters){
        hInfo.removeIf(x -> x instanceof VCFFilterHeaderLine);
    }

    addTrancheHeaderFields(SNPString, snpTranches, hInfo);
    addTrancheHeaderFields(INDELString, indelTranches, hInfo);

    final TreeSet<String> samples = new TreeSet<>();
    samples.addAll(inputHeader.getGenotypeSamples());
    hInfo.addAll(getDefaultToolVCFHeaderLines());
    final VCFHeader vcfHeader = new VCFHeader(hInfo, samples);
    vcfWriter.writeHeader(vcfHeader);
}
 
Example #4
Source File: AnnotateVcfWithExpectedAlleleFraction.java    From gatk with BSD 3-Clause "New" or "Revised" License 6 votes vote down vote up
@Override
public void onTraversalStart() {
    final VCFHeader inputHeader = getHeaderForVariants();
    final Set<VCFHeaderLine> headerLines = new HashSet<>(inputHeader.getMetaDataInSortedOrder());
    headerLines.add(new VCFInfoHeaderLine(EXPECTED_ALLELE_FRACTION_NAME, 1, VCFHeaderLineType.Float, "expected allele fraction in pooled bam"));
    final VCFHeader vcfHeader = new VCFHeader(headerLines, inputHeader.getGenotypeSamples());
    headerLines.addAll(getDefaultToolVCFHeaderLines());
    vcfWriter = createVCFWriter(outputVcf);
    vcfWriter.writeHeader(vcfHeader);

    final List<MixingFraction> mixingFractionsList = MixingFraction.readMixingFractions(inputMixingFractions);
    final Map<String, Double> mixingfractionsMap = mixingFractionsList.stream()
            .collect(Collectors.toMap(MixingFraction::getSample, MixingFraction::getMixingFraction));
    mixingFractionsInSampleOrder = inputHeader.getSampleNamesInOrder().stream()
            .mapToDouble(mixingfractionsMap::get).toArray();
}
 
Example #5
Source File: FilterFuncotations.java    From gatk with BSD 3-Clause "New" or "Revised" License 6 votes vote down vote up
@Override
public void onTraversalStart() {
    final VCFHeader vcfHeader = getHeaderForVariants();

    final VCFInfoHeaderLine funcotationHeaderLine = vcfHeader.getInfoHeaderLine(VcfOutputRenderer.FUNCOTATOR_VCF_FIELD_NAME);
    if (funcotationHeaderLine != null) {
        funcotationKeys = FuncotatorUtils.extractFuncotatorKeysFromHeaderDescription(funcotationHeaderLine.getDescription());
        outputVcfWriter = createVCFWriter(outputFile);
        vcfHeader.addMetaDataLine(new VCFFilterHeaderLine(FilterFuncotationsConstants.NOT_CLINSIG_FILTER,
                FilterFuncotationsConstants.NOT_CLINSIG_FILTER_DESCRIPTION));
        vcfHeader.addMetaDataLine(new VCFInfoHeaderLine(FilterFuncotationsConstants.CLINSIG_INFO_KEY, 1,
                VCFHeaderLineType.String, FilterFuncotationsConstants.CLINSIG_INFO_KEY_DESCRIPTION));
        outputVcfWriter.writeHeader(vcfHeader);
    } else {
        throw new UserException.BadInput("Could not extract Funcotation keys from " +
                VcfOutputRenderer.FUNCOTATOR_VCF_FIELD_NAME + " field in input VCF header.");
    }

    registerFilters();
}
 
Example #6
Source File: AnnotateVcfWithExpectedAlleleFraction.java    From gatk-protected with BSD 3-Clause "New" or "Revised" License 6 votes vote down vote up
@Override
public void onTraversalStart() {
    final VCFHeader inputHeader = getHeaderForVariants();
    final Set<VCFHeaderLine> headerLines = new HashSet<>(inputHeader.getMetaDataInSortedOrder());
    headerLines.add(new VCFInfoHeaderLine(EXPECTED_ALLELE_FRACTION_NAME, 1, VCFHeaderLineType.Float, "expected allele fraction in pooled bam"));
    final VCFHeader vcfHeader = new VCFHeader(headerLines, inputHeader.getGenotypeSamples());
    headerLines.addAll(getDefaultToolVCFHeaderLines());
    vcfWriter = createVCFWriter(outputVcf);
    vcfWriter.writeHeader(vcfHeader);

    final List<MixingFraction> mixingFractionsList = MixingFraction.readMixingFractions(inputMixingFractions);
    final Map<String, Double> mixingfractionsMap = mixingFractionsList.stream()
            .collect(Collectors.toMap(MixingFraction::getSample, MixingFraction::getMixingFraction));
    mixingFractionsInSampleOrder = inputHeader.getSampleNamesInOrder().stream()
            .mapToDouble(mixingfractionsMap::get).toArray();
}
 
Example #7
Source File: FuncotatorUtils.java    From gatk with BSD 3-Clause "New" or "Revised" License 6 votes vote down vote up
/**
 * Create funcotations (one for each alt allele) corresponding to the given variant context.
 *
 * Assumes that the fields in the variant context are named exactly the same as what is in the metadata, though the
 *  metadata may have additional fields.  The metadata must include all variant attributes.
 *
 * @param vc The variant context to derive funcotations.  Never {@code null}
 * @param metadata Existing metadata that must be a superset of the variant context info field attributes.  Never {@code null}
 * @param datasourceName Name to use as the datasource in the funcotations.  Never {@code null}
 * @return A list of funcotations based on the variant context (INFO) attributes.  Never empty, unless the metadata has no fields.  Never {@code null}
 */
public static List<Funcotation> createFuncotations(final VariantContext vc, final FuncotationMetadata metadata, final String datasourceName) {

    Utils.nonNull(vc);
    Utils.nonNull(metadata);
    Utils.nonNull(datasourceName);

    final List<String> allFields = metadata.retrieveAllHeaderInfo().stream().map(VCFInfoHeaderLine::getID).collect(Collectors.toList());

    final Set<String> attributesNotInMetadata = vc.getAttributes().keySet().stream().filter(k -> !allFields.contains(k)).collect(Collectors.toSet());
    if (attributesNotInMetadata.size() != 0) {
        throw new UserException.MalformedFile("Not all attributes in the variant context appear in the metadata: " + attributesNotInMetadata.stream().collect(Collectors.joining(", ")) + " .... Please add these attributes to the input metadata (e.g. VCF Header).");
    }

    return createFuncotationsFromMetadata(vc, metadata, datasourceName);
}
 
Example #8
Source File: FuncotatorUtils.java    From gatk with BSD 3-Clause "New" or "Revised" License 6 votes vote down vote up
/**
 * Use the given metadata to create funcotations from variant context attributes (and alt alleles)
 * @param vc Never {@code null}
 * @param metadata Fields that should be present in the funcotations.  Can be a superset of the fields in the
 *                 funcotations.  Never {@code null}
 * @param datasourceName Name to appear in all funcotations.  Never {@code null}
 * @return Instances of {@link Funcotation} for each field in the metadata x alternate allele in the variant context.
 * If a field is not present in the variant context attributes, the field will ave value empty string ("") in all output
 * funcotations.  Fields will be the same names and values for each alternate allele in the funcotations.
 */
static List<Funcotation> createFuncotationsFromMetadata(final VariantContext vc, final FuncotationMetadata metadata, final String datasourceName) {

    Utils.nonNull(vc);
    Utils.nonNull(metadata);
    Utils.nonNull(datasourceName);

    final List<String> fields = metadata.retrieveAllHeaderInfo().stream().map(VCFInfoHeaderLine::getID).collect(Collectors.toList());
    final List<Funcotation> result = new ArrayList<>();
    for (final Allele allele: vc.getAlternateAlleles()) {

        // We must have fields for everything in the metadata.
        final List<String> funcotationFieldValues = new ArrayList<>();
        for (final String funcotationFieldName : fields) {
            funcotationFieldValues.add(vc.getAttributeAsString(funcotationFieldName, ""));
        }

        result.add(TableFuncotation.create(fields, funcotationFieldValues, allele, datasourceName, metadata));
    }

    return result;
}
 
Example #9
Source File: ReblockGVCFIntegrationTest.java    From gatk with BSD 3-Clause "New" or "Revised" License 6 votes vote down vote up
@Test
public void testMQHeadersAreUpdated() throws Exception {
    final File output = createTempFile("reblockedgvcf", ".vcf");
    final ArgumentsBuilder args = new ArgumentsBuilder();
    args.add("V", getToolTestDataDir() + "justHeader.g.vcf")
            .addOutput(output);
    runCommandLine(args);

    Pair<VCFHeader, List<VariantContext>> actual = VariantContextTestUtils.readEntireVCFIntoMemory(output.getAbsolutePath());
    VCFHeader header = actual.getLeft();
    List<VCFInfoHeaderLine> infoLines = new ArrayList<>(header.getInfoHeaderLines());
    //check all the headers in case there's one old and one updated
    for (final VCFInfoHeaderLine line : infoLines) {
        if (line.getID().equals(GATKVCFConstants.RAW_RMS_MAPPING_QUALITY_DEPRECATED)) {
            Assert.assertTrue(line.getType().equals(VCFHeaderLineType.Float));
            Assert.assertTrue(line.getDescription().contains("deprecated"));
        } else if (line.getID().equals(GATKVCFConstants.MAPPING_QUALITY_DEPTH_DEPRECATED)) {
            Assert.assertTrue(line.getDescription().contains("deprecated"));
        }
    }
}
 
Example #10
Source File: FuncotatorIntegrationTest.java    From gatk with BSD 3-Clause "New" or "Revised" License 6 votes vote down vote up
private void assertEqualVariantFiles(final File outputFile, final String eColiExpectedOut) {
    // Get the actual data:
    final Pair<VCFHeader, List<VariantContext>> actualVcfInfo               = VariantContextTestUtils.readEntireVCFIntoMemory(outputFile.getAbsolutePath());
    final List<VariantContext>                  actualVariantContexts       = actualVcfInfo.getRight();
    final VCFHeader                             actualVcfHeader             = actualVcfInfo.getLeft();
    final VCFInfoHeaderLine                     actualFuncotationHeaderLine = actualVcfHeader.getInfoHeaderLine(VcfOutputRenderer.FUNCOTATOR_VCF_FIELD_NAME);

    // Get the expected data:
    final Pair<VCFHeader, List<VariantContext>> expectedVcfInfo               = VariantContextTestUtils.readEntireVCFIntoMemory(new File(eColiExpectedOut).getAbsolutePath());
    final List<VariantContext>                  expectedVariantContexts       = expectedVcfInfo.getRight();
    final VCFHeader                             expectedVcfHeader             = expectedVcfInfo.getLeft();
    final VCFInfoHeaderLine                     expectedFuncotationHeaderLine = expectedVcfHeader.getInfoHeaderLine(VcfOutputRenderer.FUNCOTATOR_VCF_FIELD_NAME);

    // Check that they're equal:
    Assert.assertEquals(actualFuncotationHeaderLine, expectedFuncotationHeaderLine);
    VariantContextTestUtils.assertEqualVariants(actualVariantContexts, expectedVariantContexts);
}
 
Example #11
Source File: SageHotspotAnnotation.java    From hmftools with GNU General Public License v3.0 5 votes vote down vote up
@NotNull
private static VCFHeader generateOutputHeader(@NotNull final VCFHeader template, @NotNull final VCFHeader hotspotVCF) {
    final VCFHeader outputVCFHeader = new VCFHeader(template.getMetaDataInInputOrder(), template.getGenotypeSamples());
    outputVCFHeader.addMetaDataLine(new VCFInfoHeaderLine(HOTSPOT_FLAG, 0, VCFHeaderLineType.Flag, HOTSPOT_DESCRIPTION));
    outputVCFHeader.addMetaDataLine(new VCFInfoHeaderLine(NEAR_HOTSPOT_FLAG, 0, VCFHeaderLineType.Flag, NEAR_HOTSPOT_DESCRIPTION));
    outputVCFHeader.addMetaDataLine(new VCFInfoHeaderLine(RECOVERED_FLAG, 0, VCFHeaderLineType.Flag, RECOVERED_FLAG_DESCRIPTION));

    for (VCFInfoHeaderLine headerLine : hotspotVCF.getInfoHeaderLines()) {
        outputVCFHeader.addMetaDataLine(headerLine);
    }

    return outputVCFHeader;
}
 
Example #12
Source File: FuncotationMetadataUtils.java    From gatk with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
/**
 * @param fieldNames Never {@code null}
 * @return {@link FuncotationMetadata} with values populated indicating that we do not really know the metadata.
 * And type is a String.  Never {@code null}
 */
public static FuncotationMetadata createWithUnknownAttributes(final List<String> fieldNames) {
    Utils.nonNull(fieldNames);
    return VcfFuncotationMetadata.create(
            fieldNames.stream().map(f -> new VCFInfoHeaderLine(f, VCFHeaderLineCount.UNBOUNDED, VCFHeaderLineType.String, UNKNOWN_DESCRIPTION))
                    .collect(Collectors.toList())
    );
}
 
Example #13
Source File: PonVCF.java    From hmftools with GNU General Public License v3.0 5 votes vote down vote up
PonVCF(final String output, int sampleSize) {
    writer = new VariantContextWriterBuilder().setOutputFile(output)
            .modifyOption(Options.INDEX_ON_THE_FLY, false)
            .modifyOption(Options.USE_ASYNC_IO, false)
            .modifyOption(Options.DO_NOT_WRITE_GENOTYPES, true)
            .build();

    final VCFHeader header = new VCFHeader();
    header.addMetaDataLine(new VCFInfoHeaderLine(PON_COUNT, 1, VCFHeaderLineType.Integer, "how many samples had the variant"));
    header.addMetaDataLine(new VCFInfoHeaderLine(PON_TOTAL, 1, VCFHeaderLineType.Integer, "total depth"));
    header.addMetaDataLine(new VCFInfoHeaderLine(PON_MAX, 1, VCFHeaderLineType.Integer, "max depth"));
    header.addMetaDataLine(new VCFHeaderLine("PonInputSampleCount", String.valueOf(sampleSize)));
    writer.writeHeader(header);
}
 
Example #14
Source File: VcfToVariant.java    From genomewarp with Apache License 2.0 5 votes vote down vote up
@VisibleForTesting
static Map<String, ListValue> getInfo(VariantContext vc, VCFHeader header) {
  Map<String, ListValue> toReturn = new HashMap<>();

  for (Map.Entry<String, Object> entry : vc.getAttributes().entrySet()) {
    String currKey = entry.getKey();
    VCFInfoHeaderLine metaData = header.getInfoHeaderLine(currKey);

    // All info fields must have a corresponding header field.
    if (metaData == null) {
      logger.log(Level.WARNING, String.format("Could not find matching VCF header field, "
          + "skipping info field %s", currKey));
      continue;
    }

    Object currObject = entry.getValue();
    ListValue.Builder listValueBuilder = ListValue.newBuilder();

    VCFHeaderLineType type = metaData.getType();
    if (!(currObject instanceof List)) {
      toReturn.put(currKey,
          listValueBuilder.addValues(createTypedValue(type, currObject)).build());
      continue;
    }

    List<Object> currObjectList = (List<Object>) currObject;
    for (Object currObj : currObjectList) {
      listValueBuilder.addValues(createTypedValue(type, currObj));
    }
    toReturn.put(currKey, listValueBuilder.build());
  }

  return toReturn;
}
 
Example #15
Source File: SagePostProcessVCF.java    From hmftools with GNU General Public License v3.0 5 votes vote down vote up
public void writeHeader(@NotNull final VCFHeader header) {
    header.addMetaDataLine(new VCFInfoHeaderLine(SNPEFF_WORST,
            5,
            VCFHeaderLineType.String,
            "SnpEff worst transcript summary [Gene, Transcript, Effect, CodingEffect, GenesAffected]"));
    header.addMetaDataLine(new VCFInfoHeaderLine(SNPEFF_CANONICAL,
            6,
            VCFHeaderLineType.String,
            "SnpEff canonical transcript summary [Gene, Transcript, Effect, CodingEffect, HgvsCodingImpact, HgvsProteinImpact]"));
    writer.writeHeader(header);
}
 
Example #16
Source File: FindMendelianViolations.java    From picard with MIT License 5 votes vote down vote up
private void writeAllViolations(final MendelianViolationDetector.Result result) {
    if (VCF_DIR != null) {
        LOG.info(String.format("Writing family violation VCFs to %s/", VCF_DIR.getAbsolutePath()));

        final VariantContextComparator vcComparator = new VariantContextComparator(inputHeader.get().getContigLines());
        final Set<VCFHeaderLine> headerLines = new LinkedHashSet<>(inputHeader.get().getMetaDataInInputOrder());

        headerLines.add(new VCFInfoHeaderLine(MendelianViolationDetector.MENDELIAN_VIOLATION_KEY, 1, VCFHeaderLineType.String, "Type of mendelian violation."));
        headerLines.add(new VCFInfoHeaderLine(MendelianViolationDetector.ORIGINAL_AC, VCFHeaderLineCount.A, VCFHeaderLineType.Integer, "Original AC"));
        headerLines.add(new VCFInfoHeaderLine(MendelianViolationDetector.ORIGINAL_AF, VCFHeaderLineCount.A, VCFHeaderLineType.Float, "Original AF"));
        headerLines.add(new VCFInfoHeaderLine(MendelianViolationDetector.ORIGINAL_AN, 1, VCFHeaderLineType.Integer, "Original AN"));

        for (final PedFile.PedTrio trio : pedFile.get().values()) {
            final File outputFile = new File(VCF_DIR, IOUtil.makeFileNameSafe(trio.getFamilyId() + IOUtil.VCF_FILE_EXTENSION));
            LOG.info(String.format("Writing %s violation VCF to %s", trio.getFamilyId(), outputFile.getAbsolutePath()));

            final VariantContextWriter out = new VariantContextWriterBuilder()
                    .setOutputFile(outputFile)
                    .unsetOption(INDEX_ON_THE_FLY)
                    .build();

            final VCFHeader newHeader = new VCFHeader(headerLines, CollectionUtil.makeList(trio.getMaternalId(), trio.getPaternalId(), trio.getIndividualId()));
            final TreeSet<VariantContext> orderedViolations = new TreeSet<>(vcComparator);

            orderedViolations.addAll(result.violations().get(trio.getFamilyId()));
            out.writeHeader(newHeader);
            orderedViolations.forEach(out::add);

            out.close();
        }
    }
}
 
Example #17
Source File: ReducibleAnnotation.java    From gatk with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
/**
 * Returns the descriptions used for the VCF INFO meta field corresponding to the annotations raw key.
 * @return A list of VCFInfoHeaderLines corresponding to the raw keys added by this annotaiton
 */
default List<VCFInfoHeaderLine> getRawDescriptions() {
    final List<VCFInfoHeaderLine> lines = new ArrayList<>(1);
    for (final String rawKey : getRawKeyNames()) {
        lines.add(GATKVCFHeaderLines.getInfoLine(rawKey));
    }
    return lines;
}
 
Example #18
Source File: InfoFieldAnnotation.java    From gatk with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
/**
 * Returns the descriptions used for the VCF INFO meta field.
 * Subclasses must ensure that this list is not null and does not contain null.
 */
public List<VCFInfoHeaderLine> getDescriptions() {
    final List<VCFInfoHeaderLine> lines = new ArrayList<>(getKeyNames().size());
    for (final String key : getKeyNames()) {
        lines.add(GATKVCFHeaderLines.getInfoLine(key));
    }
    return lines;
}
 
Example #19
Source File: GATKRegistrator.java    From gatk with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
/**
 * Make sure that all FuncotationMap (which incl. all Funcotation concrete classes and members) classes are registered
 *  to support {@link org.broadinstitute.hellbender.tools.funcotator.FuncotationMap#create(FuncotationMap)}
 *
 * @param kryo Kryo instance to update in-place.  Never {@code null}
 */
@VisibleForTesting
public static void registerFuncotationMapDependencies(final Kryo kryo) {
    Utils.nonNull(kryo);
    Registration registration = kryo.register(TableFuncotation.class);
    registration.setInstantiator(new ObjectInstantiator<TableFuncotation>() {
        public TableFuncotation newInstance() {
            return TableFuncotation.create(new LinkedHashMap<>(), Allele.UNSPECIFIED_ALTERNATE_ALLELE, "TEMP", null);
        }
    });
    registration = kryo.register(VcfFuncotationMetadata.class);
    registration.setInstantiator(new ObjectInstantiator<VcfFuncotationMetadata>() {
        public VcfFuncotationMetadata newInstance() {
            return VcfFuncotationMetadata.create(new ArrayList<>());
        }
    });
    registration = kryo.register(VCFInfoHeaderLine.class);
    registration.setInstantiator(new ObjectInstantiator<VCFInfoHeaderLine>() {
        public VCFInfoHeaderLine newInstance() {
            return new VCFInfoHeaderLine("TMP", 2, VCFHeaderLineType.String, "");
        }
    });
    registration = kryo.register(Allele.class);
    registration.setInstantiator(new ObjectInstantiator<Allele>() {
        public Allele newInstance() {
            return Allele.create("TCGA");
        }
    });
}
 
Example #20
Source File: GenotypingEngine.java    From gatk with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
public Set<VCFInfoHeaderLine> getAppropriateVCFInfoHeaders() {
    final Set<VCFInfoHeaderLine> headerInfo = new LinkedHashSet<>();
    if ( configuration.genotypeArgs.ANNOTATE_NUMBER_OF_ALLELES_DISCOVERED ) {
        headerInfo.add(GATKVCFHeaderLines.getInfoLine(GATKVCFConstants.NUMBER_OF_DISCOVERED_ALLELES_KEY));
    }
    return headerInfo;
}
 
Example #21
Source File: FuncotateSegments.java    From gatk with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
private FuncotationMetadata createMetadata() {
    return VcfFuncotationMetadata.create(
            Arrays.asList(
                    new VCFInfoHeaderLine("Segment_Mean",1, VCFHeaderLineType.Float, "Mean for the segment.  Units will be the same as the input file."),
                    new VCFInfoHeaderLine("Num_Probes",1, VCFHeaderLineType.Integer, "Number of probes/targets/bins overlapping the segment."),
                    new VCFInfoHeaderLine("Segment_Call",1, VCFHeaderLineType.String, "Segment call (whether the segment is amplified, deleted, etc)."),
                    new VCFInfoHeaderLine("Sample",1, VCFHeaderLineType.String, "Sample name for the segment."),
                    new VCFInfoHeaderLine("build",1, VCFHeaderLineType.String, "Genome build (e.g. 'hg19' or 'hg38').")
            )
    );
}
 
Example #22
Source File: VcfFuncotationMetadata.java    From gatk with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
/**
 * @param vcfInfoHeaderLines Never {@code null}
 * @return Metadata corresponding to VCF info fields.  Never {@code null}
 */
public static VcfFuncotationMetadata create(final List<VCFInfoHeaderLine> vcfInfoHeaderLines) {
    Utils.nonNull(vcfInfoHeaderLines);
    return new VcfFuncotationMetadata(
        vcfInfoHeaderLines.stream().collect(Collectors.toMap(v -> v.getID(), Function.identity(), (x1,x2) -> x2, LinkedHashMap::new ))
    );
}
 
Example #23
Source File: VariantHotspotEnrichment.java    From hmftools with GNU General Public License v3.0 5 votes vote down vote up
@NotNull
@Override
public VCFHeader enrichHeader(@NotNull final VCFHeader template) {
    template.addMetaDataLine(new VCFInfoHeaderLine(HOTSPOT_FLAG, 0, VCFHeaderLineType.Flag, HOTSPOT_DESCRIPTION));
    template.addMetaDataLine(new VCFInfoHeaderLine(NEAR_HOTSPOT_FLAG, 0, VCFHeaderLineType.Flag, NEAR_HOTSPOT_DESCRIPTION));
    return template;
}
 
Example #24
Source File: CustomMafFuncotationCreator.java    From gatk with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
private static FuncotationMetadata createCustomMafCountFieldsMetadata() {
    return VcfFuncotationMetadata.create(Arrays.asList(
            new VCFInfoHeaderLine(COUNT_FIELD_NAMES.get(0), VCFHeaderLineCount.A, VCFHeaderLineType.Integer, "Number of alternate reads in the tumor."),
            new VCFInfoHeaderLine(COUNT_FIELD_NAMES.get(1), VCFHeaderLineCount.A, VCFHeaderLineType.Integer, "Number of reference reads in the tumor."),
            new VCFInfoHeaderLine(COUNT_FIELD_NAMES.get(2), VCFHeaderLineCount.A, VCFHeaderLineType.Integer, "Number of alternate reads in the normal."),
            new VCFInfoHeaderLine(COUNT_FIELD_NAMES.get(3), VCFHeaderLineCount.A, VCFHeaderLineType.Integer, "Number of reference reads in the normal."),
            new VCFInfoHeaderLine(COUNT_FIELD_NAMES.get(4), VCFHeaderLineCount.A, VCFHeaderLineType.Float, "Allele fractions of alternate alleles in the tumor.")
    ));
}
 
Example #25
Source File: GencodeFuncotationFactory.java    From gatk with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
/**
 * Return the metadata for segment annotations.
 *
 * @return Never {@code null}
 */
private FuncotationMetadata createSegmentFuncotationMetadata() {
    return VcfFuncotationMetadata.create(
            Arrays.asList(
                    new VCFInfoHeaderLine(getName() + "_" + getVersion() + GENES_SUFFIX,1, VCFHeaderLineType.String, "The genes overlapping the segment.  Blank if none."),
                    new VCFInfoHeaderLine(getName() + "_" + getVersion() + START_GENE_SUFFIX,1, VCFHeaderLineType.String, "The genes overlapping the start of the segment.  Blank if none."),
                    new VCFInfoHeaderLine(getName() + "_" + getVersion() + END_GENE_SUFFIX,1, VCFHeaderLineType.String, "The genes overlapping the end of the segment.  Blank if none."),
                    new VCFInfoHeaderLine(getName() + "_" + getVersion() + START_EXON_SUFFIX,1, VCFHeaderLineType.String, "The genes overlapping the start of the segment.  Blank if none."),
                    new VCFInfoHeaderLine(getName() + "_" + getVersion() + END_EXON_SUFFIX,1, VCFHeaderLineType.String, "The genes overlapping the end of the segment.  Blank if none."),
                    new VCFInfoHeaderLine(getName() + "_" + getVersion() + "_alt_allele",1, VCFHeaderLineType.String, "Always blank.  Included for legacy reasons."),
                    new VCFInfoHeaderLine(getName() + "_" + getVersion() + "_ref_allele",1, VCFHeaderLineType.String, "Always blank.  Included for legacy reasons.")
            )
    );
}
 
Example #26
Source File: SimpleTsvOutputRendererUnitTest.java    From gatk with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
private static FuncotationMetadata createDummySegmentFuncotationMetadata() {
    return VcfFuncotationMetadata.create(
            Arrays.asList(
                    new VCFInfoHeaderLine("Gencode_19_genes", 1, VCFHeaderLineType.String, "The genes overlapping the segment."),
                    new VCFInfoHeaderLine("foo1", 1, VCFHeaderLineType.String, "foo1"),
                    new VCFInfoHeaderLine("foobar2", 1, VCFHeaderLineType.String, "foobar2 (an alias relative to the config file)"),
                    new VCFInfoHeaderLine("TEST3", 1, VCFHeaderLineType.String, "Note that this has no spaces"),
                    new VCFInfoHeaderLine("foo3!!", 1, VCFHeaderLineType.String, "special character....")
            )
    );
}
 
Example #27
Source File: SimpleTsvOutputRendererUnitTest.java    From gatk with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
private FuncotationMap createSimpleFuncotationMap() {
    return FuncotationMap.createNoTranscriptInfo(
            Collections.singletonList(
                    TableFuncotation.create(Collections.singletonList(FUNCOTATION_FIELD_1), Collections.singletonList("value1"),
                            AnnotatedIntervalToSegmentVariantContextConverter.COPY_NEUTRAL_ALLELE,
                            "TEST",
                            VcfFuncotationMetadata.create(
                                    Collections.singletonList(
                                            new VCFInfoHeaderLine(FUNCOTATION_FIELD_1, 1, VCFHeaderLineType.String, "Unknown")))
                    )));
}
 
Example #28
Source File: FuncotatorIntegrationTest.java    From gatk with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
@Test
public void testXsvLocatableAnnotationsHaveOnlyOneEntryForMultiHitLocations() {
    final FuncotatorArgumentDefinitions.OutputFormatType outputFormatType = FuncotatorArgumentDefinitions.OutputFormatType.VCF;
    final File outputFile = getOutputFile(outputFormatType);

    final ArgumentsBuilder arguments = createBaselineArgumentsForFuncotator(
            XSV_CLINVAR_MULTIHIT_TEST_VCF,
            outputFile,
            b37Chr2Ref,
            DS_XSV_CLINVAR_TESTS,
            FuncotatorTestConstants.REFERENCE_VERSION_HG19,
            outputFormatType,
            false);

    // We need this argument since we are testing on a subset of b37
    arguments.add(FuncotatorArgumentDefinitions.FORCE_B37_TO_HG19_REFERENCE_CONTIG_CONVERSION, true);

    runCommandLine(arguments);

    final Pair<VCFHeader, List<VariantContext>> vcfInfo = VariantContextTestUtils.readEntireVCFIntoMemory(outputFile.getAbsolutePath());
    final VCFInfoHeaderLine funcotationHeaderLine = vcfInfo.getLeft().getInfoHeaderLine(VcfOutputRenderer.FUNCOTATOR_VCF_FIELD_NAME);

    final String[] funcotationFieldNames = FuncotatorUtils.extractFuncotatorKeysFromHeaderDescription(funcotationHeaderLine.getDescription());

    final int EXPECTED_NUM_VARIANTS = 1;
    Assert.assertEquals(vcfInfo.getRight().size(), EXPECTED_NUM_VARIANTS, "Found more than " + EXPECTED_NUM_VARIANTS + " variants!");

    validateFuncotationsOnVcf(vcfInfo.getRight(), funcotationFieldNames);
}
 
Example #29
Source File: FuncotatorIntegrationTest.java    From gatk with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
@Test
public void testXsvLocatableAnnotationsHaveCorrectColsForOnlyOnePositionSpecified() {
    final FuncotatorArgumentDefinitions.OutputFormatType outputFormatType = FuncotatorArgumentDefinitions.OutputFormatType.VCF;
    final File outputFile = getOutputFile(outputFormatType);

    final ArgumentsBuilder arguments = createBaselineArgumentsForFuncotator(
            XSV_CLINVAR_COL_TEST_VCF,
            outputFile,
            b37Chr2Ref,
            DS_XSV_CLINVAR_TESTS,
            FuncotatorTestConstants.REFERENCE_VERSION_HG19,
            outputFormatType,
            false);

    arguments.add(FuncotatorArgumentDefinitions.FORCE_B37_TO_HG19_REFERENCE_CONTIG_CONVERSION, true);

    runCommandLine(arguments);

    final Pair<VCFHeader, List<VariantContext>> vcfInfo = VariantContextTestUtils.readEntireVCFIntoMemory(outputFile.getAbsolutePath());
    final VCFInfoHeaderLine funcotationHeaderLine = vcfInfo.getLeft().getInfoHeaderLine(VcfOutputRenderer.FUNCOTATOR_VCF_FIELD_NAME);

    final String[] funcotationFieldNames = FuncotatorUtils.extractFuncotatorKeysFromHeaderDescription(funcotationHeaderLine.getDescription());

    final int EXPECTED_NUM_VARIANTS = 10;
    Assert.assertEquals(vcfInfo.getRight().size(), EXPECTED_NUM_VARIANTS);

    validateFuncotationsOnVcf(vcfInfo.getRight(), funcotationFieldNames);
}
 
Example #30
Source File: VcfFuncotationFactoryUnitTest.java    From gatk with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
@Test(dataProvider = "provideForTestCreateFuncotationsOnVariant")
public void testCreateFuncotationMetadata(final String variantFeatureDataFileName,
                                          final VariantContext variant,
                                          final ReferenceContext referenceContext,
                                          final List<Funcotation> expected) {
    // Don't need the expected gt for this test, but useful to reuse the data provider.
    // Make our factory:
    final VcfFuncotationFactory vcfFuncotationFactory =
            createVcfFuncotationFactory(FACTORY_NAME, FACTORY_VERSION, IOUtils.getPath(variantFeatureDataFileName));

    // Create features from the file:
    final List<Feature> vcfFeatures;
    try (final VCFFileReader vcfReader = new VCFFileReader(IOUtils.getPath(variantFeatureDataFileName))) {
        vcfFeatures = vcfReader.query(variant.getContig(), variant.getStart(), variant.getEnd()).stream().collect(Collectors.toList());
    }

    // test the metadata
    final List<Funcotation> funcotations = vcfFuncotationFactory.createFuncotationsOnVariant(
            variant,
            referenceContext,
            vcfFeatures,
            Collections.emptyList()
    );

    Assert.assertEquals(funcotations.stream().map(f -> f.getMetadata().retrieveAllHeaderInfo()).collect(Collectors.toSet()).size(), 1);
    final Pair<VCFHeader, List<VariantContext>> vcfInfo = VariantContextTestUtils.readEntireVCFIntoMemory(variantFeatureDataFileName);
    final List<VCFInfoHeaderLine> gtOutputVcfInfoHeaderLines = vcfFuncotationFactory.createFuncotationVcfInfoHeaderLines(vcfInfo.getLeft());

    // Get the info headers that are in the VCF and make sure that these are also present in the metadata
    final Set<String> headerInfoLines = funcotations.get(0).getFieldNames();
    final Set<String> metadataFields = funcotations.get(0).getMetadata().retrieveAllHeaderInfo().stream()
            .map(f -> f.getID())
            .collect(Collectors.toSet());
    Assert.assertEquals(metadataFields, headerInfoLines);
    Assert.assertEquals(metadataFields, vcfFuncotationFactory.getSupportedFuncotationFields());
    Assert.assertEquals(funcotations.get(0).getMetadata().retrieveAllHeaderInfo(), gtOutputVcfInfoHeaderLines);
}