htsjdk.tribble.TribbleException Java Examples

The following examples show how to use htsjdk.tribble.TribbleException. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: GermlineVcfParser.java    From hmftools with GNU General Public License v3.0 6 votes vote down vote up
private boolean processVCF(final String sampleId, final File vcf)
{
    if(vcf == null)
        return false;

    LOGGER.debug("Processing vcf: {}", vcf.getPath());

    try (final VCFFileReader reader = new VCFFileReader(vcf, !mSkipIndexFile))
    {
        mProgram.processVcfFile(sampleId, reader, !mSkipIndexFile);
    }
    catch (final TribbleException e)
    {
        LOGGER.error("Error with VCF file {}: {}", vcf.getPath(), e.getMessage());
        return false;
    }

    return true;
}
 
Example #2
Source File: VcfToVariant.java    From genomewarp with Apache License 2.0 6 votes vote down vote up
private static boolean validVersion(File filepath) throws IOException {
  BufferedReader reader = Files.newBufferedReader(filepath.toPath(), UTF_8);

  // The first line must be the header
  String firstLine = reader.readLine();
  reader.close();

  try {
    VCFHeaderVersion version = VCFHeaderVersion.getHeaderVersion(firstLine);

    // If the version is greater than or equal to 4.2, we cannot handle it
    if (version.isAtLeastAsRecentAs(VCFHeaderVersion.VCF4_2)) {
      return false;
    }
  } catch (TribbleException.InvalidHeader msg) {
    throw new IOException(msg);
  }

  return true;
}
 
Example #3
Source File: ConvertBedToTargetFile.java    From gatk-protected with BSD 3-Clause "New" or "Revised" License 6 votes vote down vote up
@Override
protected Object doWork() {
    final FeatureCodec<? extends Feature, ?> codec = FeatureManager.getCodecForFile(inputBedFile);
    final Class<? extends Feature> featureType = codec.getFeatureType();
    if (BEDFeature.class.isAssignableFrom(featureType)) {
        final FeatureDataSource<? extends BEDFeature> source = new FeatureDataSource<>(inputBedFile);
        try {
            final List<Target> targets = StreamSupport.stream(source.spliterator(), false).map(ConvertBedToTargetFile::createTargetFromBEDFeature)
                    .collect(Collectors.toList());
            TargetWriter.writeTargetsToFile(outFile, targets);
        } catch (final TribbleException e) {
            throw new UserException.BadInput(String.format("'%s' has a .bed extension but does not seem to be a valid BED file.", inputBedFile.getAbsolutePath()));
        }
    } else {
        throw new UserException.BadInput(String.format("'%s' does not seem to be a BED file.", inputBedFile.getAbsolutePath()));
    }
    return "SUCCESS";
}
 
Example #4
Source File: FeatureDataSource.java    From gatk with BSD 3-Clause "New" or "Revised" License 6 votes vote down vote up
private static <T extends Feature> AbstractFeatureReader<T, ?> getTribbleFeatureReader(final FeatureInput<T> featureInput, final FeatureCodec<T, ?> codec, final Function<SeekableByteChannel, SeekableByteChannel> cloudWrapper, final Function<SeekableByteChannel, SeekableByteChannel> cloudIndexWrapper) {
    Utils.nonNull(codec);
    try {
        // Must get the path to the data file from the codec here:
        final String absoluteRawPath = featureInput.getRawInputString();

        // Instruct the reader factory to not require an index. We will require one ourselves as soon as
        // a query by interval is attempted.
        final boolean requireIndex = false;

        // Only apply the wrappers if the feature input is in a remote location which will benefit from prefetching.
        if (BucketUtils.isEligibleForPrefetching(featureInput)) {
            return AbstractFeatureReader.getFeatureReader(absoluteRawPath, null, codec, requireIndex, cloudWrapper, cloudIndexWrapper);
        } else {
            return AbstractFeatureReader.getFeatureReader(absoluteRawPath, null, codec, requireIndex, Utils.identityFunction(), Utils.identityFunction());
        }
    } catch (final TribbleException e) {
        throw new GATKException("Error initializing feature reader for path " + featureInput.getFeaturePath(), e);
    }
}
 
Example #5
Source File: GATKVariantContextUtils.java    From gatk with BSD 3-Clause "New" or "Revised" License 6 votes vote down vote up
/**
 * Determines the common reference allele
 *
 * @param VCs    the list of VariantContexts
 * @param loc    if not null, ignore records that do not begin at this start location
 * @return possibly null Allele
 */
public static Allele determineReferenceAllele(final List<VariantContext> VCs, final Locatable loc) {
    Allele ref = null;

    for ( final VariantContext vc : VCs ) {
        if ( contextMatchesLoc(vc, loc) ) {
            final Allele myRef = vc.getReference();
            try {
                ref = determineReferenceAllele(ref, myRef);
            } catch (TribbleException e) {
                throw new TribbleException(String.format("The provided variant file(s) have inconsistent references " +
                        "for the same position(s) at %s:%d, %s vs. %s", vc.getContig(), vc.getStart(), ref, myRef));
            }
        }
    }
    return ref;
}
 
Example #6
Source File: RefSeqCodec.java    From gatk with BSD 3-Clause "New" or "Revised" License 6 votes vote down vote up
@Override
public Feature decodeLoc(final LineIterator lineIterator) {
    final String line = lineIterator.next();
    if (line.startsWith(COMMENT_LINE_CHARACTER)){
        return null;
    }
    final String fields[] = line.split(LINE_DELIMITER);
    if (fields.length < MINIMUM_LINE_FIELD_COUNT){
        throw new TribbleException("RefSeq (decodeLoc) : Unable to parse line -> " + line + ", we expected at least 16 columns, we saw " + fields.length);
    }
    final String contig_name = fields[CONTIG_INDEX];
    try {
        return new RefSeqFeature(new SimpleInterval(contig_name, Integer.parseInt(fields[INTERVAL_LEFT_BOUND_INDEX])+1, Integer.parseInt(fields[INTERVAL_RIGHT_BOUND_INDEX])));
    //TODO maybe except for malformed simple intervals? Genome locs had that
    } catch ( NumberFormatException e ) {
        throw new UserException.MalformedFile("Could not parse location from line: " + line);
    }
}
 
Example #7
Source File: MakeVcfSampleNameMap.java    From picard with MIT License 5 votes vote down vote up
private static AbstractFeatureReader<VariantContext, LineIterator> getReaderFromPath(final Path variantPath) {
    final String variantURI = variantPath.toAbsolutePath().toUri().toString();
    try {
        return AbstractFeatureReader.getFeatureReader(variantURI, null, new VCFCodec(),
                false, Function.identity(), Function.identity());
    } catch (final TribbleException e) {
        throw new PicardException("Failed to create reader from " + variantURI, e);
    }
}
 
Example #8
Source File: GATKVariantContextUtils.java    From gatk with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
public static Allele determineReferenceAllele(final Allele ref1, final Allele ref2) {
    if ( ref1 == null || ref1.length() < ref2.length() ) {
        return ref2;
    } else if ( ref2 == null || ref2.length() < ref1.length()) {
        return ref1;
    }
    else if ( ref1.length() == ref2.length() && ! ref1.equals(ref2) ) {
        throw new TribbleException(String.format("The provided reference alleles do not appear to represent the same position, %s vs. %s", ref1, ref2));
    } else {  //the lengths are the same and they're equal, so we could return ref1 or ref2
        return ref1;
    }
}
 
Example #9
Source File: VCFRecordReader.java    From Hadoop-BAM with MIT License 4 votes vote down vote up
@Override public boolean nextKeyValue() throws IOException {
	while (true) {
		String line;
		while (true) {
			if (!lineRecordReader.nextKeyValue()) {
				return false;
			}
			line = lineRecordReader.getCurrentValue().toString();
			if (!line.startsWith("#")) {
				break;
			}
		}

                       final VariantContext v;
                       try {
			v = codec.decode(line);
		} catch (TribbleException e) {
			if (stringency == ValidationStringency.STRICT) {
				if (logger.isErrorEnabled()) {
					logger.error("Parsing line {} failed with {}.", line, e);
				}
				throw e;
			} else {
				if (stringency == ValidationStringency.LENIENT &&
                                           logger.isWarnEnabled()) {
					logger.warn("Parsing line {} failed with {}. Skipping...",
                                                           line, e);
				}
				continue;
			}
		}

		if (!overlaps(v)) {
			continue;
		}

		Integer chromIdx = contigDict.get(v.getContig());
		if (chromIdx == null)
			chromIdx = (int) MurmurHash3.murmurhash3(v.getContig(), 0);

		key.set((long) chromIdx << 32 | (long) (v.getStart() - 1));
		vc.set(v, header);

		return true;
	}
}
 
Example #10
Source File: TestVCFInputFormatStringency.java    From Hadoop-BAM with MIT License 4 votes vote down vote up
@Test(expected = TribbleException.class)
public void testUnset() throws Exception {
    checkReading(null); // defaults to strict
}
 
Example #11
Source File: TestVCFInputFormatStringency.java    From Hadoop-BAM with MIT License 4 votes vote down vote up
@Test(expected = TribbleException.class)
public void testDefault() throws Exception {
    checkReading(ValidationStringency.DEFAULT_STRINGENCY); // defaults to strict
}
 
Example #12
Source File: TestVCFInputFormatStringency.java    From Hadoop-BAM with MIT License 4 votes vote down vote up
@Test(expected = TribbleException.class)
public void testStrict() throws Exception {
    checkReading(ValidationStringency.STRICT);
}
 
Example #13
Source File: RefSeqCodec.java    From gatk with BSD 3-Clause "New" or "Revised" License 4 votes vote down vote up
/** Fills this object from a text line in RefSeq (UCSC) text dump file */
@Override
public RefSeqFeature decode(String line) {
    if (line.startsWith(COMMENT_LINE_CHARACTER)) {
        return null;
    }
    String fields[] = line.split(LINE_DELIMITER);

    // we reference postion 15 in the split array below, make sure we have at least that many columns
    if (fields.length < MINIMUM_LINE_FIELD_COUNT) {
        throw new TribbleException("RefSeq (decode) : Unable to parse line -> " + line + ", we expected at least 16 columns, we saw " + fields.length);
    }
    String contig_name = fields[CONTIG_INDEX];
    RefSeqFeature feature = new RefSeqFeature(new SimpleInterval(contig_name, Integer.parseInt(fields[INTERVAL_LEFT_BOUND_INDEX])+1, Integer.parseInt(fields[INTERVAL_RIGHT_BOUND_INDEX])));

    feature.setTranscript_id(fields[TRANSCRIPT_ID_INDEX]);
    if ( fields[STRAND_INDEX].length()==1 && fields[STRAND_INDEX].charAt(0)=='+') {
        feature.setStrand(1);

    } else if ( fields[STRAND_INDEX].length()==1 && fields[STRAND_INDEX].charAt(0)=='-') {
        feature.setStrand(-1);

    } else {
        throw new UserException.MalformedFile("Expected strand symbol (+/-), found: "+fields[STRAND_INDEX] + " for line=" + line);
    }

    int coding_start = Integer.parseInt(fields[CODING_START_INDEX])+1;
    int coding_stop = Integer.parseInt(fields[CODING_STOP_INDEX]);

    if ( coding_start > coding_stop ) {
        if ( ! zero_coding_length_user_warned ) {
            Utils.warnUser("RefSeq file contains transcripts with zero coding length. "+
                    "Such transcripts will be ignored (this warning is printed only once)");
            zero_coding_length_user_warned = true;
        }
        return null;
    }

    feature.setTranscript_interval(new SimpleInterval(contig_name, Integer.parseInt(fields[INTERVAL_LEFT_BOUND_INDEX])+1, Integer.parseInt(fields[INTERVAL_RIGHT_BOUND_INDEX])));
    feature.setTranscript_coding_interval(new SimpleInterval(contig_name, coding_start, coding_stop));
    feature.setGene_name(fields[GENE_NAME_INDEX]);
    String[] exon_starts = fields[EXON_STARTS_INDEX].split(",");
    String[] exon_stops = fields[EXON_STOPS_INDEX].split(",");
    String[] eframes = fields[EXON_FRAMES_INDEX].split(",");

    if ( exon_starts.length != exon_stops.length ) {
        throw new UserException.MalformedFile("Data format error: numbers of exon start and stop positions differ for line=" + line);
    }

    if ( exon_starts.length != eframes.length ) {
        throw new UserException.MalformedFile("Data format error: numbers of exons and exon frameshifts differ for line=" + line);
    }

    ArrayList<SimpleInterval> exons = new ArrayList<>(exon_starts.length);
    ArrayList<Integer> exon_frames = new ArrayList<Integer>(eframes.length);

    for ( int i = 0 ; i < exon_starts.length  ; i++ ) {
        // NOTE, we add 1 here to account for represention issues in the exon counts
        exons.add(new SimpleInterval(contig_name, Integer.parseInt(exon_starts[i])+1, Integer.parseInt(exon_stops[i]) ) );
        exon_frames.add(Integer.decode(eframes[i]));
    }

    feature.setExons(exons);
    feature.setExon_frames(exon_frames);
    return feature;
}
 
Example #14
Source File: ValidateVariants.java    From gatk with BSD 3-Clause "New" or "Revised" License 4 votes vote down vote up
@Override
public void apply(final VariantContext vc, final ReadsContext readsContext, final ReferenceContext ref, final FeatureContext featureContext) {
    if (DO_NOT_VALIDATE_FILTERED && vc.isFiltered()) {
        return;
    }
    // get the true reference allele
    final Allele reportedRefAllele = vc.getReference();
    final int refLength = reportedRefAllele.length();

    final Allele observedRefAllele = hasReference() ? Allele.create(Arrays.copyOf(ref.getBases(), refLength)) : null;

    final Set<String> rsIDs = getRSIDs(featureContext);

    if (VALIDATE_GVCF) {
        final SimpleInterval refInterval = ref.getInterval();

        validateVariantsOrder(vc);

        // GenomeLocSortedSet will automatically merge intervals that are overlapping when setting `mergeIfIntervalOverlaps`
        // to true.  In a GVCF most blocks are adjacent to each other so they wouldn't normally get merged.  We check
        // if the current record is adjacent to the previous record and "overlap" them if they are so our set is as
        // small as possible while still containing the same bases.
        final int start = (previousInterval != null && previousInterval.overlapsWithMargin(refInterval, 1)) ?
                previousInterval.getStart() : refInterval.getStart();
        final int end = (previousInterval != null && previousInterval.overlapsWithMargin(refInterval, 1)) ?
                Math.max(previousInterval.getEnd(), vc.getEnd()) : vc.getEnd();
        final GenomeLoc possiblyMergedGenomeLoc = genomeLocSortedSet.getGenomeLocParser().createGenomeLoc(refInterval.getContig(), start, end);
        genomeLocSortedSet.add(possiblyMergedGenomeLoc, true);

        previousInterval = new SimpleInterval(possiblyMergedGenomeLoc);
        previousStart = vc.getStart();
        validateGVCFVariant(vc);
    }

    for (final ValidationType t : validationTypes) {
        try{
            applyValidationType(vc, reportedRefAllele, observedRefAllele, rsIDs, t);
        } catch (TribbleException e) {
            throwOrWarn(new UserException.FailsStrictValidation(drivingVariantFile, t, e.getMessage()));
        }
    }
}