Java Code Examples for htsjdk.tribble.AbstractFeatureReader#getFeatureReader()

The following examples show how to use htsjdk.tribble.AbstractFeatureReader#getFeatureReader() . You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: VariantHotspotFile.java    From hmftools with GNU General Public License v3.0 6 votes vote down vote up
@NotNull
public static ListMultimap<Chromosome, VariantHotspot> readFromVCF(@NotNull final String fileName) throws IOException {
    ListMultimap<Chromosome, VariantHotspot> result = ArrayListMultimap.create();

    try (final AbstractFeatureReader<VariantContext, LineIterator> reader = AbstractFeatureReader.getFeatureReader(fileName,
            new VCFCodec(),
            false)) {
        for (VariantContext variantContext : reader.iterator()) {
            if (HumanChromosome.contains(variantContext.getContig())) {
                result.put(HumanChromosome.fromString(variantContext.getContig()), fromVariantContext(variantContext));
            }
        }
    }

    return result;
}
 
Example 2
Source File: GenomicsDBImportIntegrationTest.java    From gatk with BSD 3-Clause "New" or "Revised" License 6 votes vote down vote up
@Test
public void testPreserveContigOrderingInHeader() throws IOException {
    final String workspace = createTempDir("testPreserveContigOrderingInHeader-").getAbsolutePath() + "/workspace";
    ArrayList<SimpleInterval> intervals = new ArrayList<SimpleInterval>(Arrays.asList(new SimpleInterval("chr20", 17959479, 17959479)));
    writeToGenomicsDB(Arrays.asList(GENOMICSDB_TEST_DIR + "testHeaderContigLineSorting1.g.vcf",
            GENOMICSDB_TEST_DIR + "testHeaderContigLineSorting2.g.vcf"), intervals, workspace, 0, false, 0, 1);

    try ( final FeatureReader<VariantContext> genomicsDBFeatureReader =
                  getGenomicsDBFeatureReader(workspace, b38_reference_20_21);

         final AbstractFeatureReader<VariantContext, LineIterator> inputGVCFReader =
                  AbstractFeatureReader.getFeatureReader(GENOMICSDB_TEST_DIR + "testHeaderContigLineSorting1.g.vcf", new VCFCodec(), true);
    ) {
        final SAMSequenceDictionary dictionaryFromGenomicsDB = ((VCFHeader)genomicsDBFeatureReader.getHeader()).getSequenceDictionary();
        final SAMSequenceDictionary dictionaryFromInputGVCF =  ((VCFHeader)inputGVCFReader.getHeader()).getSequenceDictionary();

        Assert.assertEquals(dictionaryFromGenomicsDB, dictionaryFromInputGVCF, "Sequence dictionary from GenomicsDB does not match original sequence dictionary from input GVCF");
    }

}
 
Example 3
Source File: FeatureDataSource.java    From gatk with BSD 3-Clause "New" or "Revised" License 6 votes vote down vote up
private static <T extends Feature> AbstractFeatureReader<T, ?> getTribbleFeatureReader(final FeatureInput<T> featureInput, final FeatureCodec<T, ?> codec, final Function<SeekableByteChannel, SeekableByteChannel> cloudWrapper, final Function<SeekableByteChannel, SeekableByteChannel> cloudIndexWrapper) {
    Utils.nonNull(codec);
    try {
        // Must get the path to the data file from the codec here:
        final String absoluteRawPath = featureInput.getRawInputString();

        // Instruct the reader factory to not require an index. We will require one ourselves as soon as
        // a query by interval is attempted.
        final boolean requireIndex = false;

        // Only apply the wrappers if the feature input is in a remote location which will benefit from prefetching.
        if (BucketUtils.isEligibleForPrefetching(featureInput)) {
            return AbstractFeatureReader.getFeatureReader(absoluteRawPath, null, codec, requireIndex, cloudWrapper, cloudIndexWrapper);
        } else {
            return AbstractFeatureReader.getFeatureReader(absoluteRawPath, null, codec, requireIndex, Utils.identityFunction(), Utils.identityFunction());
        }
    } catch (final TribbleException e) {
        throw new GATKException("Error initializing feature reader for path " + featureInput.getFeaturePath(), e);
    }
}
 
Example 4
Source File: FixCallSetSampleOrderingIntegrationTest.java    From gatk with BSD 3-Clause "New" or "Revised" License 6 votes vote down vote up
@Test(dataProvider = "getBadlySortedFiles")
public void testUnshufflingRestoresCorrectSamples(final File sampleMap, final File shuffled, final int batchSize, final int readerThreads, final File gvcfToHeaderSampleMapFile) throws IOException {
    final ArgumentsBuilder args = new ArgumentsBuilder();
    final File fixed = createTempFile("fixed_", ".vcf");
    args.addVCF(shuffled)
            .add(GenomicsDBImport.SAMPLE_NAME_MAP_LONG_NAME, sampleMap)
            .add(GenomicsDBImport.VCF_INITIALIZER_THREADS_LONG_NAME, String.valueOf(readerThreads))
            .add(GenomicsDBImport.BATCHSIZE_ARG_LONG_NAME, String.valueOf(batchSize))
            .add(FixCallSetSampleOrdering.SKIP_PROMPT_LONG_NAME, true)
            .addOutput(fixed);
    if ( gvcfToHeaderSampleMapFile != null ) {
        args.add("gvcf-to-header-sample-map-file", gvcfToHeaderSampleMapFile);
    }
    
    runCommandLine(args);

    try(final FeatureReader<VariantContext> reader =
            AbstractFeatureReader.getFeatureReader(fixed.getAbsolutePath(), new VCFCodec(), true)){
        final VariantContext vc = reader.iterator().next();
        Assert.assertEquals(vc.getGenotypes().size(), 1000);
        vc.getGenotypes().iterator().forEachRemaining( g ->
                Assert.assertEquals(g.getSampleName(), g.getAnyAttribute(SAMPLE_NAME_KEY))
        );
    }
}
 
Example 5
Source File: GatherVcfsCloudIntegrationTest.java    From gatk with BSD 3-Clause "New" or "Revised" License 6 votes vote down vote up
@SuppressWarnings({"unchecked"})
private void assertGatherProducesCorrectVariants(GatherVcfsCloud.GatherType gatherType, File expected, List<File> inputs) throws IOException {
    final File output = createTempFile("gathered", ".vcf.gz");
    final ArgumentsBuilder args = new ArgumentsBuilder();
    inputs.forEach(args::addInput);
    args.addOutput(output)
            .add(GatherVcfsCloud.GATHER_TYPE_LONG_NAME, gatherType.toString());
    runCommandLine(args);

    try (final AbstractFeatureReader<VariantContext, ?> actualReader = AbstractFeatureReader.getFeatureReader(output.getAbsolutePath(), null, new VCFCodec(), false, Function.identity(), Function.identity());
         final AbstractFeatureReader<VariantContext, ?> expectedReader = AbstractFeatureReader.getFeatureReader(expected.getAbsolutePath(), null, new VCFCodec(), false, Function.identity(), Function.identity())) {

        final List<VariantContext> actualVariants = StreamSupport.stream(Spliterators.spliteratorUnknownSize(actualReader.iterator(),Spliterator.ORDERED), false).collect(Collectors.toList());
        final List<VariantContext> expectedVariants = StreamSupport.stream(Spliterators.spliteratorUnknownSize(expectedReader.iterator(),Spliterator.ORDERED), false).collect(Collectors.toList());
        VariantContextTestUtils.assertEqualVariants(actualVariants, expectedVariants);

        Assert.assertEquals(((VCFHeader) actualReader.getHeader()).getMetaDataInInputOrder(),
                ((VCFHeader) expectedReader.getHeader()).getMetaDataInInputOrder());
    }
}
 
Example 6
Source File: RevertSamSpark.java    From gatk with BSD 3-Clause "New" or "Revised" License 6 votes vote down vote up
@SuppressWarnings("unchecked")
static List<String>  validateOutputParamsByReadGroup(final String output, final String outputMap) throws IOException {
    final List<String> errors = new ArrayList<>();
    if (output != null) {
        if (!Files.isDirectory(IOUtil.getPath(output))) {
            errors.add("When '--output-by-readgroup' is set and output is provided, it must be a directory: " + output);
        }
        return errors;
    }
    // output is null if we reached here
    if (outputMap == null) {
        errors.add("Must provide either output or outputMap when '--output-by-readgroup' is set.");
        return errors;
    }
    if (!Files.isReadable(IOUtil.getPath(outputMap))) {
        errors.add("Cannot read outputMap " + outputMap);
        return errors;
    }
    final FeatureReader<TableFeature>  parser = AbstractFeatureReader.getFeatureReader(outputMap, new TableCodec(null),false);
    if (!isOutputMapHeaderValid((List<String>)parser.getHeader())) {
        errors.add("Invalid header: " + outputMap + ". Must be a tab-separated file with +"+OUTPUT_MAP_READ_GROUP_FIELD_NAME+"+ as first column and output as second column.");
    }
    return errors;
}
 
Example 7
Source File: LongISLNDReadAlignmentMap.java    From varsim with BSD 2-Clause "Simplified" License 5 votes vote down vote up
public LongISLNDReadAlignmentMap(final Collection<String> readAlignmentMapFiles) throws IOException {
    for (final String readAlignmentMapFileName : readAlignmentMapFiles) {
        log.info("Reading in read map from " + readAlignmentMapFileName);
        final AbstractFeatureReader<BEDFeature, LineIterator> featureReader = AbstractFeatureReader.getFeatureReader(readAlignmentMapFileName, new BEDCodec(), false);
        try {
            final CloseableTribbleIterator<BEDFeature> featureIterator = featureReader.iterator();
            while (featureIterator.hasNext()) {
                final BEDFeature feature = featureIterator.next();
                readAlignmentMap.put(feature.getName(), new LongISLNDReadMapRecord(feature).toReadMapRecord());
            }
        } finally {
            featureReader.close();
        }
    }
}
 
Example 8
Source File: FuncotationMapUnitTest.java    From gatk with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
@Test(expectedExceptions = GATKException.ShouldNeverReachHereException.class, expectedExceptionsMessageRegExp = ".*a Gencode Funcotation cannot be added.*")
public void testAddingGencodeFuncotationToFuncotationMap() {

    try (final FeatureReader<GencodeGtfFeature> gencodeFeatureReader = AbstractFeatureReader.getFeatureReader( FuncotatorTestConstants.GENCODE_DATA_SOURCE_GTF_PATH_HG19, new GencodeGtfCodec() ) ) {

        // Create some gencode funcotations.  The content does not really matter here.
        final List<Funcotation> gencodeFuncotations = createGencodeFuncotations("chr19", 8994200, 8994200, "G", "C", FuncotatorReferenceTestUtils.retrieveHg19Chr19Ref(),
                ReferenceDataSource.of(IOUtils.getPath(FuncotatorReferenceTestUtils.retrieveHg19Chr19Ref())),
                gencodeFeatureReader, FuncotatorTestConstants.GENCODE_DATA_SOURCE_FASTA_PATH_HG19, FuncotatorTestConstants.GENCODE_DATA_SOURCE_GTF_PATH_HG19,
                TranscriptSelectionMode.ALL).stream().map(gf -> (Funcotation) gf).collect(Collectors.toList());

        // Create a funcotationMap with some pre-made funcotations.  Content does not really matter.
        final FuncotationMap funcotationMap = FuncotationMap.createNoTranscriptInfo(Arrays.asList(TableFuncotation.create(
                Arrays.asList("TESTFIELD1", "TESTADD1"),
                createFieldValuesFromNameList("E", Arrays.asList("TESTFIELD1", "TESTADD1")),
                Allele.create("A"),
                "TestDataSource5", null
                ),
                TableFuncotation.create(
                        Arrays.asList("TESTFIELD2", "TESTADD2"),
                        createFieldValuesFromNameList("F", Arrays.asList("TESTFIELD2", "TESTADD2")),
                        Allele.create("AG"),
                        "TestDataSource5", null
                ),
                TableFuncotation.create(
                        Arrays.asList("TESTFIELD3", "TESTADD3"),
                        createFieldValuesFromNameList("G", Arrays.asList("TESTFIELD3", "TESTADD3")),
                        Allele.create("AT"),
                        "TestDataSource5", null
                )));

        // Attempt to add the Gencode funcotations to the funcotation map.  This should cause an exception.
        funcotationMap.add(FuncotationMap.NO_TRANSCRIPT_AVAILABLE_KEY, gencodeFuncotations);
    }
    catch (final IOException ex) {
        throw new GATKException("Could not close gencodeFeatureReader!", ex);
    }
}
 
Example 9
Source File: FuncotationMapUnitTest.java    From gatk with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
@Test(dataProvider = "provideGencodeFuncotationCreation")
public void testGencodeFuncotationCreation(final String contig,
                                           final int start,
                                           final int end,
                                           final String ref,
                                           final String alt,
                                           final String referenceFileName,
                                           final ReferenceDataSource referenceDataSource,
                                           final String transcriptFastaFile,
                                           final String transcriptGtfFile,
                                           final TranscriptSelectionMode transcriptSelectionMode,
                                           final List<String> gtTranscripts) {

    try (final FeatureReader<GencodeGtfFeature> gencodeFeatureReader = AbstractFeatureReader.getFeatureReader( FuncotatorTestConstants.GENCODE_DATA_SOURCE_GTF_PATH_HG19, new GencodeGtfCodec() ) ) {
        final List<GencodeFuncotation> gencodeFuncotations = createGencodeFuncotations(contig, start, end, ref, alt, referenceFileName, referenceDataSource, gencodeFeatureReader, transcriptFastaFile, transcriptGtfFile, transcriptSelectionMode);

        final FuncotationMap funcotationMap = FuncotationMap.createFromGencodeFuncotations(gencodeFuncotations);

        Assert.assertEquals(funcotationMap.getTranscriptList(), gtTranscripts);
        Assert.assertTrue(funcotationMap.getTranscriptList().stream().allMatch(k -> funcotationMap.get(k).size() == 1));
        Assert.assertTrue(funcotationMap.getTranscriptList().stream()
                .noneMatch(k -> ((GencodeFuncotation) funcotationMap.get(k).get(0)).getVariantClassification().equals(GencodeFuncotation.VariantClassification.IGR)));
        Assert.assertTrue(funcotationMap.getTranscriptList().stream()
                .noneMatch(k -> ((GencodeFuncotation) funcotationMap.get(k).get(0)).getVariantClassification().equals(GencodeFuncotation.VariantClassification.COULD_NOT_DETERMINE)));
    }
    catch ( final IOException ex ) {
        throw new GATKException("Could not close gencodeFeatureReader!", ex);
    }
}
 
Example 10
Source File: GATKVariantContextUtilsUnitTest.java    From gatk with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
@Test
public void testOnTheFlyTabixCreation() throws IOException {
    final File inputGZIPFile = new File(publicTestDir + "org/broadinstitute/hellbender/engine/8_mutect2_sorted.vcf.gz");
    final File outputGZIPFile = createTempFile("testOnTheFlyTabixCreation", ".vcf.gz");

    long recordCount = 0;
    try (final VariantContextWriter vcfWriter = GATKVariantContextUtils.createVCFWriter(
             outputGZIPFile.toPath(),
             null,
             false,
             Options.INDEX_ON_THE_FLY);
        final FeatureReader<VariantContext> inputFileReader =
                 AbstractFeatureReader.getFeatureReader(inputGZIPFile.getAbsolutePath(), new VCFCodec(), false))
    {
        vcfWriter.writeHeader((VCFHeader)inputFileReader.getHeader());
        final Iterator<VariantContext> it = inputFileReader.iterator();
        while (it.hasNext()) {
            vcfWriter.add(it.next());
            recordCount++;
        }
    }

    // make sure we got a tabix index
    final File tabixIndexFile = new File(outputGZIPFile.getAbsolutePath() + FileExtensions.TABIX_INDEX);
    Assert.assertTrue(tabixIndexFile.exists());
    Assert.assertTrue(tabixIndexFile.length() > 0);

    // verify the index via query
    final SimpleInterval queryInterval = new SimpleInterval("chr6:33414233-118314029");
    try (final FeatureReader<VariantContext> outputFileReader = AbstractFeatureReader.getFeatureReader(
            outputGZIPFile.getAbsolutePath(),
            new VCFCodec(),
            true)) // require index
    {
        final long actualCount = outputFileReader.query(
                queryInterval.getContig(), queryInterval.getStart(), queryInterval.getEnd()).stream().count();
        Assert.assertEquals(actualCount, recordCount);
    }
}
 
Example 11
Source File: RevertSamSpark.java    From gatk with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
private static Map<String, Path> createOutputMapFromFile(final String outputMapFile) throws IOException {
    final Map<String, Path> outputMap = new HashMap<>();
    try (final FeatureReader<TableFeature>  parser = AbstractFeatureReader.getFeatureReader(outputMapFile, new TableCodec(null), false);) {
        for (final TableFeature row : parser.iterator()) {
            final String id = row.get(OUTPUT_MAP_READ_GROUP_FIELD_NAME);
            final String output = row.get(OUTPUT_MAP_OUTPUT_FILE_FIELD_NAME);
            final Path outputPath = IOUtils.getPath(output);
            outputMap.put(id, outputPath);
        }
    }
    return outputMap;
}
 
Example 12
Source File: AnnotatedIntervalCollection.java    From gatk with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
/** Create a collection based on the contents of an input file and a given config file.  The config file must be the same as
 * is ingested by {@link XsvLocatableTableCodec}.
 *
 * @param input readable path to use for the xsv file.  Must be readable.  Never {@code null}.
 * @param inputConfigFile config file for specifying the format of the xsv file.  Must be readable.  Never {@code null}.
 * @param headersOfInterest Only preserve these headers.  Only a warning will be logged if all of these are not
 *                          present in the input file.  This parameter should not include the locatable columns
 *                          defined by the config file, which are always preserved.
 *                          Use {@code null} to indicate "all headers are of interest".
 * @return never {@code null}
 */
public static AnnotatedIntervalCollection create(final Path input, final Path inputConfigFile, final Set<String> headersOfInterest) {

    IOUtils.assertFileIsReadable(input);
    IOUtils.assertFileIsReadable(inputConfigFile);

    final AnnotatedIntervalCodec codec = new AnnotatedIntervalCodec(inputConfigFile);
    final List<AnnotatedInterval> regions = new ArrayList<>();
    
    if (codec.canDecode(input.toUri().toString())) {
        try (final FeatureReader<AnnotatedInterval> reader = AbstractFeatureReader.getFeatureReader(input.toUri().toString(), codec, false)){

            // This cast is an artifact of the tribble framework.
            @SuppressWarnings("unchecked")
            final AnnotatedIntervalHeader header = (AnnotatedIntervalHeader) reader.getHeader();

            final List<String> finalAnnotations = determineCollectionAnnotations(headersOfInterest, header.getAnnotations());

            final CloseableTribbleIterator<AnnotatedInterval> it = reader.iterator();
            StreamSupport.stream(it.spliterator(), false)
                    .filter(r -> r != null)
                    .map(r -> copyAnnotatedInterval(r,  new HashSet<>(finalAnnotations)))
                    .forEach(r -> regions.add(r));

            return new AnnotatedIntervalCollection(header.getSamFileHeader(), finalAnnotations, regions);

        } catch ( final IOException ex ) {
            throw new GATKException("Error - IO problem with file " + input, ex);
        }
    } else {
        throw new UserException.BadInput("Could not parse xsv file: " + input.toUri().toString());
    }
}
 
Example 13
Source File: MakeVcfSampleNameMap.java    From picard with MIT License 5 votes vote down vote up
private static AbstractFeatureReader<VariantContext, LineIterator> getReaderFromPath(final Path variantPath) {
    final String variantURI = variantPath.toAbsolutePath().toUri().toString();
    try {
        return AbstractFeatureReader.getFeatureReader(variantURI, null, new VCFCodec(),
                false, Function.identity(), Function.identity());
    } catch (final TribbleException e) {
        throw new PicardException("Failed to create reader from " + variantURI, e);
    }
}
 
Example 14
Source File: GermlineVcfReader.java    From hmftools with GNU General Public License v3.0 5 votes vote down vote up
private void processVcf(final String vcfFile)
{
    try
    {
        LOGGER.info("processing germline VCF({})", vcfFile);

        mSampleGermlineSVs.clear();
        mSvFactory = new StructuralVariantFactory(new AlwaysPassFilter());

        final AbstractFeatureReader<VariantContext, LineIterator> reader = AbstractFeatureReader.getFeatureReader(
                vcfFile, new VCFCodec(), false);

        reader.iterator().forEach(x -> processVariant(x));

        if(mSampleGermlineSVs.isEmpty())
            return;

        if (mConfig.LinkByAssembly)
            annotateAssembledLinks(mSvAssemblyData);

        if(mConfig.CheckDisruptions)
        {
            final String sampleId = mSampleGermlineSVs.get(0).SampleId;
            mGeneImpact.findDisruptiveVariants(sampleId, mSampleGermlineSVs);
        }

        writeSVs();
    }
    catch(IOException e)
    {
        LOGGER.error("error reading vcf({}): {}", vcfFile, e.toString());
    }
}
 
Example 15
Source File: StructuralVariantFileLoader.java    From hmftools with GNU General Public License v3.0 5 votes vote down vote up
@NotNull
public static List<StructuralVariant> fromFile(@NotNull String vcfFileLocation, @NotNull VariantContextFilter filter) throws IOException {
    final StructuralVariantFactory factory = new StructuralVariantFactory(filter);

    try (final AbstractFeatureReader<VariantContext, LineIterator> reader = AbstractFeatureReader.getFeatureReader(vcfFileLocation,
            new VCFCodec(), false)) {
        reader.iterator().forEach(factory::addVariantContext);
    }

    return factory.results();
}
 
Example 16
Source File: ApplyVQSRIntegrationTest.java    From gatk with BSD 3-Clause "New" or "Revised" License 4 votes vote down vote up
@Test
public void testApplyRecalibrationAlleleSpecificINDELmodeGZIPIndex() throws IOException {
    final File tempGZIPOut = createTempFile("testApplyRecalibrationAlleleSpecificINDELmodeGZIP", ".vcf.gz");
    final File expectedFile = new File(getToolTestDataDir(), "expected/applyIndelAlleleSpecificResult.vcf");
    final SimpleInterval queryInterval = new SimpleInterval("3:113005755-195507036");

    // The input file is the same file as used in testApplyRecalibrationAlleleSpecificINDELmode, except that the
    // hg38 sequence dictionary has been transplanted into the header (the header itself has been modified so that
    // contig "chr3" is renamed to "3" to match the variants in this file), and the file is a .gz.
    final String base =
            " -L " +  queryInterval.toString() +
                    " -mode INDEL -AS" +
                    " -ts-filter-level 99.3" +
                    " --variant " + getToolTestDataDir() + "VQSR.AStest.postSNPinput.HACKEDhg38header.vcf.gz" +
                    " --output " + tempGZIPOut.getAbsolutePath() +
                    " --tranches-file " + getToolTestDataDir() + "VQSR.AStest.indels.tranches" +
                    " --recal-file " + getToolTestDataDir() + "VQSR.AStest.indels.recal.vcf" +
                    " --" + StandardArgumentDefinitions.ADD_OUTPUT_VCF_COMMANDLINE +" false";

    final IntegrationTestSpec spec = new IntegrationTestSpec(base, Collections.emptyList());
    spec.executeTest("testApplyRecalibrationAlleleSpecificINDELmodeGZIP", this);

    // make sure we got a tabix index
    final File tabixIndexFile = new File(tempGZIPOut.getAbsolutePath() + FileExtensions.TABIX_INDEX);
    Assert.assertTrue(tabixIndexFile.exists());
    Assert.assertTrue(tabixIndexFile.length() > 0);

    // Now verify that the resulting index is valid by querying the same interval used above to run ApplyVQSR.
    // This should return all the variants in the file. Compare that with the number of records returned by (a
    // non-query) iterator that return all variants in the expected results file.
    try (final FeatureReader<VariantContext> expectedFileReader =
                 AbstractFeatureReader.getFeatureReader(expectedFile.getAbsolutePath(), new VCFCodec(), false);
            final FeatureReader<VariantContext> outputFileReader =
                 AbstractFeatureReader.getFeatureReader(tempGZIPOut.getAbsolutePath(), new VCFCodec())) {
        // results from the query should match the expected file results
        final long actualCount = outputFileReader.query(
                queryInterval.getContig(), queryInterval.getStart(), queryInterval.getEnd()).stream().count();
        final long expectedCount = expectedFileReader.iterator().stream().count();
        Assert.assertEquals(actualCount, expectedCount);
    }
}
 
Example 17
Source File: GatherVcfsCloud.java    From gatk with BSD 3-Clause "New" or "Revised" License 4 votes vote down vote up
private static FeatureReader<VariantContext> getReaderFromVCFUri(final Path variantPath, final int cloudPrefetchBuffer) {
    final String variantURI = variantPath.toUri().toString();
    return AbstractFeatureReader.getFeatureReader(variantURI, null, new VCFCodec(), false,
            BucketUtils.getPrefetchingWrapper(cloudPrefetchBuffer),
            Function.identity());
}
 
Example 18
Source File: BedToIntervalList.java    From picard with MIT License 4 votes vote down vote up
@Override
protected int doWork() {
    IOUtil.assertFileIsReadable(INPUT);
    IOUtil.assertFileIsReadable(SEQUENCE_DICTIONARY);
    IOUtil.assertFileIsWritable(OUTPUT);
    try {
        // create a new header that we will assign the dictionary provided by the SAMSequenceDictionaryExtractor to.
        final SAMFileHeader header = new SAMFileHeader();
        final SAMSequenceDictionary samSequenceDictionary = SAMSequenceDictionaryExtractor.extractDictionary(SEQUENCE_DICTIONARY.toPath());
        header.setSequenceDictionary(samSequenceDictionary);
        // set the sort order to be sorted by coordinate, which is actually done below
        // by getting the .uniqued() intervals list before we write out the file
        header.setSortOrder(SAMFileHeader.SortOrder.coordinate);
        final IntervalList intervalList = new IntervalList(header);

        final FeatureReader<BEDFeature> bedReader = AbstractFeatureReader.getFeatureReader(INPUT.getAbsolutePath(), new BEDCodec(), false);
        final CloseableTribbleIterator<BEDFeature> iterator = bedReader.iterator();
        final ProgressLogger progressLogger = new ProgressLogger(LOG, (int) 1e6);

        while (iterator.hasNext()) {
            final BEDFeature bedFeature = iterator.next();
            final String sequenceName = bedFeature.getContig();
            final int start = bedFeature.getStart();
            final int end = bedFeature.getEnd();
            // NB: do not use an empty name within an interval
            final String name;
            if (bedFeature.getName().isEmpty()) {
                name = null;
            } else {
                name = bedFeature.getName();
            }

            final SAMSequenceRecord sequenceRecord = header.getSequenceDictionary().getSequence(sequenceName);

            // Do some validation
            if (null == sequenceRecord) {
                if (DROP_MISSING_CONTIGS) {
                    LOG.info(String.format("Dropping interval with missing contig: %s:%d-%d", sequenceName, bedFeature.getStart(), bedFeature.getEnd()));
                    missingIntervals++;
                    missingRegion += bedFeature.getEnd() - bedFeature.getStart();
                    continue;
                }
                throw new PicardException(String.format("Sequence '%s' was not found in the sequence dictionary", sequenceName));
            } else if (start < 1) {
                throw new PicardException(String.format("Start on sequence '%s' was less than one: %d", sequenceName, start));
            } else if (sequenceRecord.getSequenceLength() < start) {
                throw new PicardException(String.format("Start on sequence '%s' was past the end: %d < %d", sequenceName, sequenceRecord.getSequenceLength(), start));
            } else if ((end == 0 && start != 1 ) //special case for 0-length interval at the start of a contig
                    || end < 0 ) {
                throw new PicardException(String.format("End on sequence '%s' was less than one: %d", sequenceName, end));
            } else if (sequenceRecord.getSequenceLength() < end) {
                throw new PicardException(String.format("End on sequence '%s' was past the end: %d < %d", sequenceName, sequenceRecord.getSequenceLength(), end));
            } else if (end < start - 1) {
                throw new PicardException(String.format("On sequence '%s', end < start-1: %d <= %d", sequenceName, end, start));
            }

            final boolean isNegativeStrand = bedFeature.getStrand() == Strand.NEGATIVE;
            final Interval interval = new Interval(sequenceName, start, end, isNegativeStrand, name);
            intervalList.add(interval);

            progressLogger.record(sequenceName, start);
        }
        CloserUtil.close(bedReader);

        if (DROP_MISSING_CONTIGS) {
            if (missingRegion == 0) {
                LOG.info("There were no missing regions.");
            } else {
                LOG.warn(String.format("There were %d missing regions with a total of %d bases", missingIntervals, missingRegion));
            }
        }
        // Sort and write the output
        IntervalList out = intervalList;
        if (SORT) {
            out = out.sorted();
        }
        if (UNIQUE) {
            out = out.uniqued();
        }
        out.write(OUTPUT);
        LOG.info(String.format("Wrote %d intervals spanning a total of %d bases", out.getIntervals().size(),out.getBaseCount()));

    } catch (final IOException e) {
        throw new RuntimeException(e);
    }

    return 0;
}