Java Code Examples for htsjdk.variant.vcf.VCFHeader#getMetaDataInInputOrder()

The following examples show how to use htsjdk.variant.vcf.VCFHeader#getMetaDataInInputOrder() . You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: StrelkaPostProcessApplication.java    From hmftools with GNU General Public License v3.0 6 votes vote down vote up
@NotNull
public static VCFHeader generateOutputHeader(@NotNull final VCFHeader header, @NotNull final String sampleName) {
    final VCFHeader outputVCFHeader = new VCFHeader(header.getMetaDataInInputOrder(), Sets.newHashSet(sampleName));
    outputVCFHeader.addMetaDataLine(VCFStandardHeaderLines.getFormatLine("GT"));
    outputVCFHeader.addMetaDataLine(VCFStandardHeaderLines.getFormatLine("AD"));

    outputVCFHeader.addMetaDataLine(new VCFHeaderLine("StrelkaGATKCompatibility",
            "Added GT fields to strelka calls for gatk compatibility."));
    outputVCFHeader.addMetaDataLine(new VCFInfoHeaderLine("MAPPABILITY", 1, VCFHeaderLineType.Float, "Mappability (percentage)"));
    outputVCFHeader.addMetaDataLine(new VCFInfoHeaderLine("SOMATIC_PON_COUNT",
            1,
            VCFHeaderLineType.Integer,
            "Number of times the variant appears in the somatic PON"));
    outputVCFHeader.addMetaDataLine(new VCFInfoHeaderLine("GERMLINE_PON_COUNT",
            1,
            VCFHeaderLineType.Integer,
            "Number of times the variant appears in the germline PON"));
    return outputVCFHeader;
}
 
Example 2
Source File: StructuralVariantHeader.java    From hmftools with GNU General Public License v3.0 5 votes vote down vote up
@NotNull
public static VCFHeader generateHeader(@NotNull final String purpleVersion, @NotNull final VCFHeader template) {
    final VCFHeader outputVCFHeader = new VCFHeader(template.getMetaDataInInputOrder(), template.getGenotypeSamples());
    outputVCFHeader.addMetaDataLine(new VCFHeaderLine("purpleVersion", purpleVersion));

    outputVCFHeader.addMetaDataLine(VCFStandardHeaderLines.getFormatLine("GT"));
    outputVCFHeader.addMetaDataLine(new VCFInfoHeaderLine(StructuralVariantFactory.RECOVERED,
            0,
            VCFHeaderLineType.Flag,
            RECOVERED_DESC));
    outputVCFHeader.addMetaDataLine(new VCFInfoHeaderLine(StructuralVariantFactory.INFERRED, 0, VCFHeaderLineType.Flag, INFERRED_DESC));
    outputVCFHeader.addMetaDataLine(new VCFFilterHeaderLine(INFERRED, INFERRED_DESC));
    outputVCFHeader.addMetaDataLine(new VCFInfoHeaderLine(StructuralVariantFactory.IMPRECISE,
            0,
            VCFHeaderLineType.Flag,
            IMPRECISE_DESC));
    outputVCFHeader.addMetaDataLine(new VCFInfoHeaderLine(CIPOS, 2, VCFHeaderLineType.Integer, CIPOS_DESC));
    outputVCFHeader.addMetaDataLine(new VCFInfoHeaderLine(SVTYPE, 1, VCFHeaderLineType.String, SVTYPE_DESC));
    outputVCFHeader.addMetaDataLine(new VCFInfoHeaderLine(PURPLE_AF_INFO, UNBOUNDED, VCFHeaderLineType.Float, PURPLE_AF_DESC));
    outputVCFHeader.addMetaDataLine(new VCFInfoHeaderLine(PURPLE_CN_INFO, UNBOUNDED, VCFHeaderLineType.Float, PURPLE_CN_DESC));
    outputVCFHeader.addMetaDataLine(new VCFInfoHeaderLine(RECOVERY_METHOD, 1, VCFHeaderLineType.String, RECOVERY_METHOD_DESC));
    outputVCFHeader.addMetaDataLine(new VCFInfoHeaderLine(RECOVERY_FILTER, UNBOUNDED, VCFHeaderLineType.String, RECOVERY_FILTER_DESC));
    outputVCFHeader.addMetaDataLine(new VCFInfoHeaderLine(PURPLE_JUNCTION_COPY_NUMBER_INFO, 1, VCFHeaderLineType.Float,
            PURPLE_JUNCTION_COPY_NUMBER_DESC));
    outputVCFHeader.addMetaDataLine(new VCFInfoHeaderLine(PURPLE_CN_CHANGE_INFO,
            UNBOUNDED,
            VCFHeaderLineType.Float,
            PURPLE_CN_CHANGE_DESC));
    return outputVCFHeader;
}
 
Example 3
Source File: SageHotspotAnnotation.java    From hmftools with GNU General Public License v3.0 5 votes vote down vote up
@NotNull
private static VCFHeader generateOutputHeader(@NotNull final VCFHeader template, @NotNull final VCFHeader hotspotVCF) {
    final VCFHeader outputVCFHeader = new VCFHeader(template.getMetaDataInInputOrder(), template.getGenotypeSamples());
    outputVCFHeader.addMetaDataLine(new VCFInfoHeaderLine(HOTSPOT_FLAG, 0, VCFHeaderLineType.Flag, HOTSPOT_DESCRIPTION));
    outputVCFHeader.addMetaDataLine(new VCFInfoHeaderLine(NEAR_HOTSPOT_FLAG, 0, VCFHeaderLineType.Flag, NEAR_HOTSPOT_DESCRIPTION));
    outputVCFHeader.addMetaDataLine(new VCFInfoHeaderLine(RECOVERED_FLAG, 0, VCFHeaderLineType.Flag, RECOVERED_FLAG_DESCRIPTION));

    for (VCFInfoHeaderLine headerLine : hotspotVCF.getInfoHeaderLines()) {
        outputVCFHeader.addMetaDataLine(headerLine);
    }

    return outputVCFHeader;
}
 
Example 4
Source File: RenameSampleInVcf.java    From picard with MIT License 5 votes vote down vote up
@Override
protected int doWork() {
    IOUtil.assertFileIsReadable(INPUT);
    IOUtil.assertFileIsWritable(OUTPUT);

    final VCFFileReader in = new VCFFileReader(INPUT, false);
    final VCFHeader header = in.getFileHeader();

    if (header.getGenotypeSamples().size() > 1) {
        throw new IllegalArgumentException("Input VCF must be single-sample.");
    }

    if (OLD_SAMPLE_NAME != null && !OLD_SAMPLE_NAME.equals(header.getGenotypeSamples().get(0))) {
        throw new IllegalArgumentException("Input VCF did not contain expected sample. Contained: " + header.getGenotypeSamples().get(0));
    }

    final EnumSet<Options> options = EnumSet.copyOf(VariantContextWriterBuilder.DEFAULT_OPTIONS);
    if (CREATE_INDEX) options.add(Options.INDEX_ON_THE_FLY); else options.remove(Options.INDEX_ON_THE_FLY);

    final VCFHeader outHeader = new VCFHeader(header.getMetaDataInInputOrder(), CollectionUtil.makeList(NEW_SAMPLE_NAME));
    final VariantContextWriter out = new VariantContextWriterBuilder()
            .setOptions(options)
            .setOutputFile(OUTPUT).setReferenceDictionary(outHeader.getSequenceDictionary()).build();
    out.writeHeader(outHeader);

    for (final VariantContext ctx : in) {
        out.add(ctx);
    }

    out.close();
    in.close();

    return 0;
}
 
Example 5
Source File: LazyVCFGenotypesContext.java    From Hadoop-BAM with MIT License 5 votes vote down vote up
@Override public void setHeader(VCFHeader header) {
	VCFHeaderVersion version = null;

	// Normally AbstractVCFCodec parses the header and thereby sets the
	// version field. It gets used later on so we need to set it.
	for (final VCFHeaderLine line : header.getMetaDataInInputOrder()) {
		if (VCFHeaderVersion.isFormatString(line.getKey())) {
			version = VCFHeaderVersion.toHeaderVersion(line.getValue());
			break;
		}
	}

	codec.setHeaderAndVersion(header, version);
}
 
Example 6
Source File: UpdateVCFSequenceDictionary.java    From gatk with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
@Override
public void onTraversalStart() {
    VCFHeader inputHeader = getHeaderForVariants();
    VCFHeader outputHeader = inputHeader == null ?
            new VCFHeader() :
            new VCFHeader(inputHeader.getMetaDataInInputOrder(), inputHeader.getGenotypeSamples()) ;
    getDefaultToolVCFHeaderLines().forEach(line -> outputHeader.addMetaDataLine(line));
    sourceDictionary = getBestAvailableSequenceDictionary();

    // If -replace is set, do not need to check the sequence dictionary for validity here -- it will still be
    // checked in our normal sequence dictionary validation. Warn and require opt-in via -replace if we're about to
    // clobber a valid sequence dictionary. Check the input file directly via the header rather than using the
    // engine, since it might dig one up from an index.
    if (!replace) {
        SAMSequenceDictionary oldDictionary =
                inputHeader == null ? null : inputHeader.getSequenceDictionary();
        if (oldDictionary != null && !oldDictionary.getSequences().isEmpty())  {
            throw new CommandLineException.BadArgumentValue(
                    String.format(
                            "The input variant file %s already contains a sequence dictionary. " +
                                    "Use %s to force the dictionary to be replaced.",
                            getDrivingVariantsFeatureInput().getName(),
                            REPLACE_ARGUMENT_NAME
                    )
            );
        }

    }

    outputHeader.setSequenceDictionary(sourceDictionary);
    vcfWriter = createVCFWriter(new File(outFile));
    vcfWriter.writeHeader(outputHeader);
}
 
Example 7
Source File: CombineGVCFs.java    From gatk with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
private VariantContextWriter getVCFWriter() {
    final SortedSet<String> samples = getSamplesForVariants();

    final VCFHeader inputVCFHeader = new VCFHeader(getHeaderForVariants().getMetaDataInInputOrder(), samples);

    final Set<VCFHeaderLine> headerLines = new LinkedHashSet<>(inputVCFHeader.getMetaDataInInputOrder());
    headerLines.addAll(getDefaultToolVCFHeaderLines());

    headerLines.addAll(annotationEngine.getVCFAnnotationDescriptions());

    // add headers for annotations added by this tool
    headerLines.add(VCFStandardHeaderLines.getInfoLine(VCFConstants.DEPTH_KEY));   // needed for gVCFs without DP tags
    if ( dbsnp.dbsnp != null  ) {
        VCFStandardHeaderLines.addStandardInfoLines(headerLines, true, VCFConstants.DBSNP_KEY);
    }

    if (somaticInput) {
        //single-sample M2 variant filter status will get moved to genotype filter
        headerLines.add(VCFStandardHeaderLines.getFormatLine(VCFConstants.GENOTYPE_FILTER_KEY));

        if (!dropSomaticFilteringAnnotations) {
            //standard M2 INFO annotations for filtering will get moved to FORMAT field
            for (final String key : Mutect2FilteringEngine.STANDARD_MUTECT_INFO_FIELDS_FOR_FILTERING) {
                headerLines.add(GATKVCFHeaderLines.getEquivalentFormatHeaderLine(key));
            }
        }
    }

    VariantContextWriter writer = createVCFWriter(outputFile);

    final Set<String> sampleNameSet = new IndexedSampleList(samples).asSetOfSamples();
    final VCFHeader vcfHeader = new VCFHeader(headerLines, new TreeSet<>(sampleNameSet));
    writer.writeHeader(vcfHeader);

    return writer;
}
 
Example 8
Source File: FixCallSetSampleOrdering.java    From gatk with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
@Override
public void onTraversalStart() {
    assertThatTheyReallyWantToProceed();

    if (batchSize == 0) {
        throw new SampleNameFixingCannotProceedException("your callset is not affected by the bug if you ran with --"+ GenomicsDBImport.BATCHSIZE_ARG_LONG_NAME +" 0");
    }

    if ( readerThreads > 1 ) {
        if ( gvcfToHeaderSampleMapFile == null ) {
            throw new SampleNameFixingCannotProceedException("You must provide a --gvcfToHeaderSampleMapFile if GenomicsDBImport was run with --" + GenomicsDBImport.VCF_INITIALIZER_THREADS_LONG_NAME + " > 1");
        }
    } else if ( gvcfToHeaderSampleMapFile != null ) {
        throw new SampleNameFixingCannotProceedException("You must NOT provide a --gvcfToHeaderSampleMapFile if GenomicsDBImport was run with --" + GenomicsDBImport.VCF_INITIALIZER_THREADS_LONG_NAME + " 1");
    }

    final VCFHeader originalHeader = getHeaderForVariants();
    final Set<VCFHeaderLine> originalHeaderLines = originalHeader.getMetaDataInInputOrder();
    final Set<VCFHeaderLine> newHeaderLines = new LinkedHashSet<>(originalHeaderLines);
    newHeaderLines.addAll(getDefaultToolVCFHeaderLines());

    loadSampleNameMappings();
    final List<String> sampleNamesOriginalOrdering = new ArrayList<>(sampleNameMapFromGenomicsDBImport.keySet());
    if( sampleNamesOriginalOrdering.size() <= batchSize ){
        throw new SampleNameFixingCannotProceedException("you are not affected by the sample name ordering bug if your batch size is >= the number of samples in your callset. \n"
                                        + "batch size: " + batchSize + "\n"
                                        + "number of samples: " + sampleNamesOriginalOrdering.size());
    }
    assertSampleNamesMatchInputVCF(originalHeader.getSampleNamesInOrder(), sampleNamesOriginalOrdering);

    final List<String> batchSortedSampleNames = getBatchSortedList();

    final VCFHeader remappedHeader = new VCFHeader(newHeaderLines, batchSortedSampleNames);
    logger.info("Writing the new header with corrected sample names");
    writer = createVCFWriter(output);
    writer.writeHeader(remappedHeader);
    logger.info("Copying the rest of the VCF");
}
 
Example 9
Source File: AnnotateStrelkaWithAllelicDepth.java    From hmftools with GNU General Public License v3.0 4 votes vote down vote up
@NotNull
private VCFHeader generateOutputHeader(@NotNull final VCFHeader template) {
    final VCFHeader outputVCFHeader = new VCFHeader(template.getMetaDataInInputOrder(), template.getGenotypeSamples());
    outputVCFHeader.addMetaDataLine(VCFStandardHeaderLines.getFormatLine("AD"));
    return outputVCFHeader;
}
 
Example 10
Source File: GenomeWarpSerial.java    From genomewarp with Apache License 2.0 4 votes vote down vote up
private static VCFHeader warpHeader(VCFHeader in, Map<String, Long> namesAndLength)
    throws IllegalArgumentException {
  Set<VCFHeaderLine> newLines = new HashSet<>();
  boolean hasSource = false;
  VCFHeaderVersion version = DEFAULT_VCF_VERSION;

  for (VCFHeaderLine line : in.getMetaDataInInputOrder()) {
    if (line.getKey().equals("reference")) {
      newLines.add(new VCFHeaderLine(line.getKey(), ARGS.refTargetFASTA));
    } else if (line.getKey().equals("source")) {
      newLines.add(new VCFHeaderLine(line.getKey(), line.getValue() + "_and_"
          + GENOME_WARP_VERSION));
      hasSource = true;
    } else if (line.getKey().equals("fileformat")) {
      version = VCFHeaderVersion.toHeaderVersion(line.getValue());
      if (version == null) {
        throw new IllegalArgumentException("malformed version: " + line.getValue());
      }
    } else if (line.getKey().equals(VCFConstants.CONTIG_HEADER_KEY)) {
      continue;
    } else {
      newLines.add(line);
    }
  }

  if (!hasSource) {
    newLines.add(new VCFHeaderLine("source", GENOME_WARP_VERSION));
  }

  // Add contigs
  int i = 0;
  for (Map.Entry<String, Long> entry : namesAndLength.entrySet()) {
    String currName = entry.getKey();
    long chrSize = entry.getValue();

    newLines.add(new VCFContigHeaderLine(VCFHeaderLine.toStringEncoding(
        createContigEntry(currName, chrSize, ARGS.targetAssembly, ARGS.species)),
        version, VCFConstants.CONTIG_HEADER_KEY, i++));
  }

  return new VCFHeader(newLines, in.getSampleNamesInOrder());
}
 
Example 11
Source File: MakeSitesOnlyVcf.java    From picard with MIT License 4 votes vote down vote up
@Override
protected int doWork() {
    IOUtil.assertFileIsReadable(INPUT);
    IOUtil.assertFileIsWritable(OUTPUT);

    final VCFFileReader reader = new VCFFileReader(INPUT, false);
    final VCFHeader inputVcfHeader = new VCFHeader(reader.getFileHeader().getMetaDataInInputOrder());
    final SAMSequenceDictionary sequenceDictionary = inputVcfHeader.getSequenceDictionary();

    if (CREATE_INDEX && sequenceDictionary == null) {
        throw new PicardException("A sequence dictionary must be available (either through the input file or by setting it explicitly) when creating indexed output.");
    }

    final ProgressLogger progress = new ProgressLogger(Log.getInstance(MakeSitesOnlyVcf.class), 10000);

    // Setup the site-only file writer
    final VariantContextWriterBuilder builder = new VariantContextWriterBuilder()
            .setOutputFile(OUTPUT)
            .setReferenceDictionary(sequenceDictionary);
    if (CREATE_INDEX)
        builder.setOption(Options.INDEX_ON_THE_FLY);
    else
        builder.unsetOption(Options.INDEX_ON_THE_FLY);
    final VariantContextWriter writer = builder.build();

    final VCFHeader header = new VCFHeader(inputVcfHeader.getMetaDataInInputOrder(), SAMPLE);
    writer.writeHeader(header);

    // Go through the input, strip the records and write them to the output
    final CloseableIterator<VariantContext> iterator = reader.iterator();
    while (iterator.hasNext()) {
        final VariantContext full = iterator.next();
        final VariantContext site = subsetToSamplesWithOriginalAnnotations(full, SAMPLE);
        writer.add(site);
        progress.record(site.getContig(), site.getStart());
    }

    CloserUtil.close(iterator);
    CloserUtil.close(reader);
    writer.close();

    return 0;
}
 
Example 12
Source File: GnarlyGenotyper.java    From gatk with BSD 3-Clause "New" or "Revised" License 4 votes vote down vote up
private void setupVCFWriter(VCFHeader inputVCFHeader, SampleList samples) {
    final Set<VCFHeaderLine> headerLines = new LinkedHashSet<>(inputVCFHeader.getMetaDataInInputOrder());
    headerLines.addAll(getDefaultToolVCFHeaderLines());

    // Remove GCVFBlocks
    headerLines.removeIf(vcfHeaderLine -> vcfHeaderLine.getKey().startsWith(GVCFWriter.GVCF_BLOCK));

    //add header for new filter
    headerLines.add(GATKVCFHeaderLines.getFilterLine(GATKVCFConstants.LOW_QUAL_FILTER_NAME));

    // add headers for annotations added by this tool
    headerLines.add(VCFStandardHeaderLines.getInfoLine(VCFConstants.ALLELE_COUNT_KEY));
    headerLines.add(VCFStandardHeaderLines.getInfoLine(VCFConstants.ALLELE_FREQUENCY_KEY));
    headerLines.add(VCFStandardHeaderLines.getInfoLine(VCFConstants.ALLELE_NUMBER_KEY));
    headerLines.add(VCFStandardHeaderLines.getInfoLine(VCFConstants.ALLELE_NUMBER_KEY));
    headerLines.add(GATKVCFHeaderLines.getInfoLine(GATKVCFConstants.AC_ADJUSTED_KEY));
    headerLines.add(GATKVCFHeaderLines.getInfoLine(GATKVCFConstants.FISHER_STRAND_KEY));
    headerLines.add(GATKVCFHeaderLines.getInfoLine(GATKVCFConstants.STRAND_ODDS_RATIO_KEY));
    headerLines.add(GATKVCFHeaderLines.getInfoLine(GATKVCFConstants.SB_TABLE_KEY));
    headerLines.add(GATKVCFHeaderLines.getInfoLine(GATKVCFConstants.EXCESS_HET_KEY));
    headerLines.add(GATKVCFHeaderLines.getInfoLine(GATKVCFConstants.QUAL_BY_DEPTH_KEY));
    headerLines.add(VCFStandardHeaderLines.getInfoLine(VCFConstants.RMS_MAPPING_QUALITY_KEY));
    headerLines.add(VCFStandardHeaderLines.getInfoLine(VCFConstants.DEPTH_KEY));   // needed for gVCFs without DP tags
    if (inputVCFHeader.hasInfoLine(GATKVCFConstants.AS_QUAL_KEY) || inputVCFHeader.hasInfoLine(GATKVCFConstants.AS_RAW_QUAL_APPROX_KEY)) {  //use this as a proxy for all AS headers
        headerLines.add(GATKVCFHeaderLines.getInfoLine(GATKVCFConstants.AS_ALT_ALLELE_DEPTH_KEY));
        headerLines.add(GATKVCFHeaderLines.getInfoLine(GATKVCFConstants.AS_BASE_QUAL_RANK_SUM_KEY));
        headerLines.add(GATKVCFHeaderLines.getInfoLine(GATKVCFConstants.AS_FISHER_STRAND_KEY));
        headerLines.add(GATKVCFHeaderLines.getInfoLine(GATKVCFConstants.AS_RMS_MAPPING_QUALITY_KEY));
        headerLines.add(GATKVCFHeaderLines.getInfoLine(GATKVCFConstants.AS_MAP_QUAL_RANK_SUM_KEY));
        headerLines.add(GATKVCFHeaderLines.getInfoLine(GATKVCFConstants.AS_QUAL_BY_DEPTH_KEY));
        headerLines.add(GATKVCFHeaderLines.getInfoLine(GATKVCFConstants.AS_READ_POS_RANK_SUM_KEY));
        headerLines.add(GATKVCFHeaderLines.getInfoLine(GATKVCFConstants.AS_STRAND_ODDS_RATIO_KEY));
    }

    if ( dbsnp.dbsnp != null  ) {
        VCFStandardHeaderLines.addStandardInfoLines(headerLines, true, VCFConstants.DBSNP_KEY);
    }

    vcfWriter = createVCFWriter(outputFile);
    if (outputDbName != null) {
        annotationDatabaseWriter = createVCFWriter(new File(outputDbName));
    }

    final Set<String> sampleNameSet = samples.asSetOfSamples();
    headerLines.add(GATKVCFHeaderLines.getInfoLine(GATKVCFConstants.RAW_GENOTYPE_COUNT_KEY));
    if (SUMMARIZE_PLs) {
        headerLines.add(GATKVCFHeaderLines.getFormatLine(GATKVCFConstants.REFERENCE_GENOTYPE_QUALITY));
        headerLines.add(GATKVCFHeaderLines.getFormatLine(GATKVCFConstants.GENOTYPE_QUALITY_BY_ALLELE_BALANCE));
        headerLines.add(GATKVCFHeaderLines.getFormatLine(GATKVCFConstants.GENOTYPE_QUALITY_BY_ALT_CONFIDENCE));
    }
    final VCFHeader vcfHeader = new VCFHeader(headerLines, new TreeSet<>(sampleNameSet));
    final VCFHeader dbHeader = new VCFHeader(headerLines);
    vcfWriter.writeHeader(vcfHeader);
    if (outputDbName != null) {
        annotationDatabaseWriter.writeHeader(dbHeader);
    }
}