Java Code Examples for htsjdk.tribble.CloseableTribbleIterator#next()

The following examples show how to use htsjdk.tribble.CloseableTribbleIterator#next() . You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: LongISLNDReadAlignmentMap.java    From varsim with BSD 2-Clause "Simplified" License 5 votes vote down vote up
public LongISLNDReadAlignmentMap(final Collection<String> readAlignmentMapFiles) throws IOException {
    for (final String readAlignmentMapFileName : readAlignmentMapFiles) {
        log.info("Reading in read map from " + readAlignmentMapFileName);
        final AbstractFeatureReader<BEDFeature, LineIterator> featureReader = AbstractFeatureReader.getFeatureReader(readAlignmentMapFileName, new BEDCodec(), false);
        try {
            final CloseableTribbleIterator<BEDFeature> featureIterator = featureReader.iterator();
            while (featureIterator.hasNext()) {
                final BEDFeature feature = featureIterator.next();
                readAlignmentMap.put(feature.getName(), new LongISLNDReadMapRecord(feature).toReadMapRecord());
            }
        } finally {
            featureReader.close();
        }
    }
}
 
Example 2
Source File: BedToIntervalList.java    From picard with MIT License 4 votes vote down vote up
@Override
protected int doWork() {
    IOUtil.assertFileIsReadable(INPUT);
    IOUtil.assertFileIsReadable(SEQUENCE_DICTIONARY);
    IOUtil.assertFileIsWritable(OUTPUT);
    try {
        // create a new header that we will assign the dictionary provided by the SAMSequenceDictionaryExtractor to.
        final SAMFileHeader header = new SAMFileHeader();
        final SAMSequenceDictionary samSequenceDictionary = SAMSequenceDictionaryExtractor.extractDictionary(SEQUENCE_DICTIONARY.toPath());
        header.setSequenceDictionary(samSequenceDictionary);
        // set the sort order to be sorted by coordinate, which is actually done below
        // by getting the .uniqued() intervals list before we write out the file
        header.setSortOrder(SAMFileHeader.SortOrder.coordinate);
        final IntervalList intervalList = new IntervalList(header);

        final FeatureReader<BEDFeature> bedReader = AbstractFeatureReader.getFeatureReader(INPUT.getAbsolutePath(), new BEDCodec(), false);
        final CloseableTribbleIterator<BEDFeature> iterator = bedReader.iterator();
        final ProgressLogger progressLogger = new ProgressLogger(LOG, (int) 1e6);

        while (iterator.hasNext()) {
            final BEDFeature bedFeature = iterator.next();
            final String sequenceName = bedFeature.getContig();
            final int start = bedFeature.getStart();
            final int end = bedFeature.getEnd();
            // NB: do not use an empty name within an interval
            final String name;
            if (bedFeature.getName().isEmpty()) {
                name = null;
            } else {
                name = bedFeature.getName();
            }

            final SAMSequenceRecord sequenceRecord = header.getSequenceDictionary().getSequence(sequenceName);

            // Do some validation
            if (null == sequenceRecord) {
                if (DROP_MISSING_CONTIGS) {
                    LOG.info(String.format("Dropping interval with missing contig: %s:%d-%d", sequenceName, bedFeature.getStart(), bedFeature.getEnd()));
                    missingIntervals++;
                    missingRegion += bedFeature.getEnd() - bedFeature.getStart();
                    continue;
                }
                throw new PicardException(String.format("Sequence '%s' was not found in the sequence dictionary", sequenceName));
            } else if (start < 1) {
                throw new PicardException(String.format("Start on sequence '%s' was less than one: %d", sequenceName, start));
            } else if (sequenceRecord.getSequenceLength() < start) {
                throw new PicardException(String.format("Start on sequence '%s' was past the end: %d < %d", sequenceName, sequenceRecord.getSequenceLength(), start));
            } else if ((end == 0 && start != 1 ) //special case for 0-length interval at the start of a contig
                    || end < 0 ) {
                throw new PicardException(String.format("End on sequence '%s' was less than one: %d", sequenceName, end));
            } else if (sequenceRecord.getSequenceLength() < end) {
                throw new PicardException(String.format("End on sequence '%s' was past the end: %d < %d", sequenceName, sequenceRecord.getSequenceLength(), end));
            } else if (end < start - 1) {
                throw new PicardException(String.format("On sequence '%s', end < start-1: %d <= %d", sequenceName, end, start));
            }

            final boolean isNegativeStrand = bedFeature.getStrand() == Strand.NEGATIVE;
            final Interval interval = new Interval(sequenceName, start, end, isNegativeStrand, name);
            intervalList.add(interval);

            progressLogger.record(sequenceName, start);
        }
        CloserUtil.close(bedReader);

        if (DROP_MISSING_CONTIGS) {
            if (missingRegion == 0) {
                LOG.info("There were no missing regions.");
            } else {
                LOG.warn(String.format("There were %d missing regions with a total of %d bases", missingIntervals, missingRegion));
            }
        }
        // Sort and write the output
        IntervalList out = intervalList;
        if (SORT) {
            out = out.sorted();
        }
        if (UNIQUE) {
            out = out.uniqued();
        }
        out.write(OUTPUT);
        LOG.info(String.format("Wrote %d intervals spanning a total of %d bases", out.getIntervals().size(),out.getBaseCount()));

    } catch (final IOException e) {
        throw new RuntimeException(e);
    }

    return 0;
}
 
Example 3
Source File: GencodeFuncotationFactoryUnitTest.java    From gatk with BSD 3-Clause "New" or "Revised" License 4 votes vote down vote up
@Test (dataProvider = "provideMuc16SnpDataForGetVariantClassification")
void testGetVariantClassificationForCodingRegions(final int chromosomeNumber,
                                  final int start,
                                  final int end,
                                  final GencodeFuncotation.VariantType variantType,
                                  final String ref,
                                  final String alt,
                                  final GencodeFuncotation.VariantClassification expectedVariantClassification) {

    // This test can only deal with variants in coding regions.
    // So we ignore any tests that are expected outside of coding regions.
    // i.e. expectedVariantClassification is one of:
    //     { INTRON, FIVE_PRIME_UTR, THREE_PRIME_UTR, IGR, FIVE_PRIME_FLANK, DE_NOVO_START_IN_FRAME, DE_NOVO_START_OUT_FRAME, RNA, LINCRNA }
    // We test these cases in another unit test.
    if ((expectedVariantClassification == GencodeFuncotation.VariantClassification.INTRON) ||
        (expectedVariantClassification == GencodeFuncotation.VariantClassification.FIVE_PRIME_UTR) ||
        (expectedVariantClassification == GencodeFuncotation.VariantClassification.THREE_PRIME_UTR) ||
        (expectedVariantClassification == GencodeFuncotation.VariantClassification.IGR) ||
        (expectedVariantClassification == GencodeFuncotation.VariantClassification.FIVE_PRIME_FLANK) ||
        (expectedVariantClassification == GencodeFuncotation.VariantClassification.DE_NOVO_START_IN_FRAME) ||
        (expectedVariantClassification == GencodeFuncotation.VariantClassification.DE_NOVO_START_OUT_FRAME) ||
        (expectedVariantClassification == GencodeFuncotation.VariantClassification.RNA) ||
        (expectedVariantClassification == GencodeFuncotation.VariantClassification.LINCRNA) )
    {
        return;
    }

    final String contig = "chr" + Integer.toString(chromosomeNumber);
    final SimpleInterval variantInterval = new SimpleInterval( contig, start, end );

    final Allele refAllele = Allele.create(ref, true);
    final Allele altAllele = Allele.create(alt);

    final VariantContextBuilder variantContextBuilder = new VariantContextBuilder(
            FuncotatorReferenceTestUtils.retrieveHg19Chr19Ref(),
            contig,
            start,
            end,
            Arrays.asList(refAllele, altAllele)
    );
    final VariantContext variantContext = variantContextBuilder.make();

    // Get our gene feature iterator:
    final CloseableTribbleIterator<GencodeGtfFeature> gtfFeatureIterator;
    try {
        gtfFeatureIterator = gencodeHg19FeatureReader.query(contig, start, end);
    }
    catch (final IOException ex) {
        throw new GATKException("Could not finish the test!", ex);
    }

    // Get the gene.
    // We know the first gene is the right one - the gene in question is the MUC16 gene:
    final GencodeGtfGeneFeature             gene = (GencodeGtfGeneFeature) gtfFeatureIterator.next();
    final GencodeGtfTranscriptFeature transcript = getMuc16Transcript(gene);
    final GencodeGtfExonFeature             exon = getExonForVariant( gene, variantInterval );

    final ReferenceContext referenceContext = new ReferenceContext( refDataSourceHg19Ch19, variantInterval );

    final List<? extends Locatable> exonPositionList = GencodeFuncotationFactory.getSortedCdsAndStartStopPositions(transcript);

    final ReferenceDataSource muc16TranscriptDataSource = ReferenceDataSource.of(new File(FuncotatorTestConstants.GENCODE_DATA_SOURCE_FASTA_PATH_HG19).toPath());
    final Map<String, GencodeFuncotationFactory.MappedTranscriptIdInfo> muc16TranscriptIdMap = GencodeFuncotationFactory. createTranscriptIdMap(muc16TranscriptDataSource);

    final SequenceComparison seqComp =
            GencodeFuncotationFactory.createSequenceComparison(
                    variantContext,
                    altAllele,
                    referenceContext,
                    transcript,
                    exonPositionList,
                    muc16TranscriptIdMap,
                    muc16TranscriptDataSource,
                    true);

    final GencodeFuncotation.VariantClassification varClass = GencodeFuncotationFactory.createVariantClassification(
            variantContext,
            altAllele,
            variantType,
            exon,
            transcript.getExons().size(),
            seqComp
    );

    Assert.assertEquals( varClass, expectedVariantClassification );
}
 
Example 4
Source File: GencodeFuncotationFactoryUnitTest.java    From gatk with BSD 3-Clause "New" or "Revised" License 4 votes vote down vote up
@Test (dataProvider = "provideMuc16SnpDataForGetVariantClassificationWithOutOfCdsData")
void testMuc16SnpCreateFuncotations(final int chromosomeNumber,
                                    final int start,
                                    final int end,
                                    final GencodeFuncotation.VariantType expectedVariantType,
                                    final String ref,
                                    final String alt,
                                    final GencodeFuncotation.VariantClassification expectedVariantClassification) {

    final String contig = "chr" + Integer.toString(chromosomeNumber);
    final SimpleInterval variantInterval = new SimpleInterval( contig, start, end );

    final Allele refAllele = Allele.create(ref, true);
    final Allele altAllele = Allele.create(alt);

    final VariantContextBuilder variantContextBuilder = new VariantContextBuilder(
            FuncotatorReferenceTestUtils.retrieveHg19Chr19Ref(),
            contig,
            start,
            end,
            Arrays.asList(refAllele, altAllele)
    );
    final VariantContext variantContext = variantContextBuilder.make();

    // Get our gene feature iterator:
    final CloseableTribbleIterator<GencodeGtfFeature> gtfFeatureIterator;
    try {
        gtfFeatureIterator = gencodeHg19FeatureReader.query(contig, start, end);
    }
    catch (final IOException ex) {
        throw new GATKException("Could not finish the test!", ex);
    }

    // Get the gene.
    // We know the first gene is the right one - the gene in question is the MUC16 gene:
    final GencodeGtfGeneFeature gene = (GencodeGtfGeneFeature) gtfFeatureIterator.next();
    final ReferenceContext referenceContext = new ReferenceContext(refDataSourceHg19Ch19, variantInterval );
    final Set<String> requestedTranscriptIds = getValidTranscriptsForGene("MUC16");

    // Create a factory for our funcotations:
    try (final GencodeFuncotationFactory funcotationFactory = new GencodeFuncotationFactory(
            IOUtils.getPath(FuncotatorTestConstants.GENCODE_DATA_SOURCE_FASTA_PATH_HG19),
            "VERSION",
            GencodeFuncotationFactory.DEFAULT_NAME,
            FuncotatorArgumentDefinitions.TRANSCRIPT_SELECTION_MODE_DEFAULT_VALUE,
            requestedTranscriptIds,
            new LinkedHashMap<>(), createFeatureInputForMuc16Ds(GencodeFuncotationFactory.DEFAULT_NAME), "HG19")) {

        // Generate our funcotations:
        final List<Feature> featureList = new ArrayList<>();
        featureList.add( gene );
        final List<Funcotation> funcotations = funcotationFactory.createFuncotationsOnVariant(variantContext, referenceContext, featureList);

        // Make sure we get what we expected:
        Assert.assertEquals(funcotations.size(), 1);

        final GencodeFuncotation funcotation = (GencodeFuncotation)funcotations.get(0);

        Assert.assertEquals(funcotation.getVariantClassification(), expectedVariantClassification);
        Assert.assertEquals(funcotation.getVariantType(), expectedVariantType);
    }
}
 
Example 5
Source File: GencodeFuncotationFactoryUnitTest.java    From gatk with BSD 3-Clause "New" or "Revised" License 4 votes vote down vote up
@Test(dataProvider = "provideForCreateNonBasicFuncotations")
void createNonBasicFuncotations(final int start, final int end) {

    final String contig = "chr19";

    final SimpleInterval variantInterval = new SimpleInterval( contig, start, end );

    final Allele refAllele = Allele.create("C", true);
    final Allele altAllele = Allele.create("G", false);

    final VariantContextBuilder variantContextBuilder = new VariantContextBuilder(
            FuncotatorReferenceTestUtils.retrieveHg19Chr19Ref(),
            contig,
            start,
            end,
            Arrays.asList(refAllele, altAllele)
    );
    final VariantContext variantContext = variantContextBuilder.make();

    // Get our gene feature iterator:
    final CloseableTribbleIterator<GencodeGtfFeature> gtfFeatureIterator;
    try {
        gtfFeatureIterator = muc16NonBasicFeatureReader.query(contig, start, end);
    }
    catch (final IOException ex) {
        throw new GATKException("Could not finish the test!", ex);
    }

    // Get the gene.
    // We know the first gene is the right one - the gene in question is the MUC16 gene:
    final GencodeGtfGeneFeature gene = (GencodeGtfGeneFeature) gtfFeatureIterator.next();
    final ReferenceContext referenceContext = new ReferenceContext(refDataSourceHg19Ch19, variantInterval );

    // Create a factory for our funcotations:
    try (final GencodeFuncotationFactory funcotationFactory = new GencodeFuncotationFactory(
                                                                IOUtils.getPath(FuncotatorTestConstants.GENCODE_DATA_SOURCE_FASTA_PATH_HG19),
                                                                "VERSION",
                                                                GencodeFuncotationFactory.DEFAULT_NAME,
                                                                FuncotatorArgumentDefinitions.TRANSCRIPT_SELECTION_MODE_DEFAULT_VALUE,
                                                                new HashSet<>(),
                                                                new LinkedHashMap<>(),
                                                                createFeatureInputForMuc16Ds(GencodeFuncotationFactory.DEFAULT_NAME),
                                                                "HG19")) {

        // Generate our funcotations:
        final List<Feature> featureList = new ArrayList<>();
        featureList.add( gene );
        final List<Funcotation> funcotations = funcotationFactory.createFuncotationsOnVariant(variantContext, referenceContext, featureList);

        // Make sure we get what we expected (a single IGR funcotation):
        Assert.assertEquals(funcotations.size(), 1);
        Assert.assertTrue(funcotations.get(0) instanceof GencodeFuncotation);

        final GencodeFuncotation gencodeFuncotation = (GencodeFuncotation)funcotations.get(0);
        Assert.assertEquals(gencodeFuncotation.getVariantClassification(), GencodeFuncotation.VariantClassification.IGR);
        Assert.assertNull(gencodeFuncotation.getHugoSymbol());
    }
}