htsjdk.tribble.index.IndexFactory Java Examples

The following examples show how to use htsjdk.tribble.index.IndexFactory. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: IndexUtils.java    From gatk with BSD 3-Clause "New" or "Revised" License 6 votes vote down vote up
/**
 * Load a Tribble .idx index from disk, checking for out of date indexes and old versions
 * @return an Index, or null if we're unable to load
 */
public static Index loadTribbleIndex(final File featureFile) {
    Utils.nonNull(featureFile);
    final File indexFile = Tribble.indexFile(featureFile);
    if (! indexFile.canRead()) {
        return null;
    }
    logger.debug("Loading Tribble index from disk for file " + featureFile);
    try {
        final Index index = IndexFactory.loadIndex(indexFile.getAbsolutePath());
        checkIndexVersionAndModificationTime(featureFile, indexFile, index);
        return index;
    } catch (final RuntimeException e){
        return null;
    }
}
 
Example #2
Source File: IndexUtils.java    From gatk with BSD 3-Clause "New" or "Revised" License 6 votes vote down vote up
/**
 * Try to load the tabix index from disk, checking for out of date indexes and old versions
 * @return an Index, or null if we're unable to load
 */
public static Index loadTabixIndex(final File featureFile) {
    Utils.nonNull(featureFile);
    try {
        final String path = featureFile.getAbsolutePath();
        final boolean isTabix = new AbstractFeatureReader.ComponentMethods().isTabix(path, null);
        if (! isTabix){
            return null;
        }
        final String indexPath = ParsingUtils.appendToPath(path, FileExtensions.TABIX_INDEX);
        logger.debug("Loading tabix index from disk for file " + featureFile);
        final Index index = IndexFactory.loadIndex(indexPath);
        final File indexFile = new File(indexPath);
        checkIndexVersionAndModificationTime(featureFile, indexFile, index);
        return index;
    } catch (final IOException | RuntimeException e) {
        return null;
    }
}
 
Example #3
Source File: IndexFeatureFileIntegrationTest.java    From gatk with BSD 3-Clause "New" or "Revised" License 6 votes vote down vote up
@Test
public void testVCFIndex() {
    final File ORIG_FILE = getTestFile("test_variants_for_index.vcf");
    final File outName = createTempFile("test_variants_for_index.vcf", ".idx");

    final String[] args = {
            "-I", ORIG_FILE.getAbsolutePath(),
            "-O" ,  outName.getAbsolutePath()
    };

    final Object res = this.runCommandLine(args);
    Assert.assertEquals(res, outName.getAbsolutePath());

    final Index index = IndexFactory.loadIndex(res.toString());
    Assert.assertTrue(index instanceof LinearIndex);

    Assert.assertEquals(index.getSequenceNames(), Arrays.asList("1", "2", "3", "4"));
    checkIndex(index, Arrays.asList("1", "2", "3", "4"));
}
 
Example #4
Source File: IndexFeatureFileIntegrationTest.java    From gatk with BSD 3-Clause "New" or "Revised" License 6 votes vote down vote up
@Test(groups={"bucket"})
public void testVCFIndexOnCloud() throws IOException {
    final File testFile = getTestFile("test_variants_for_index.vcf");
    final String vcfOnGCS = BucketUtils.getTempFilePath(
            getGCPTestStaging() +"testIndexOnCloud", ".vcf");
    BucketUtils.copyFile(testFile.getAbsolutePath(), vcfOnGCS);

    final String[] args = new String[] {
            "IndexFeatureFile", "-I", vcfOnGCS
    };

    new Main().instanceMain(args);

    Assert.assertTrue(BucketUtils.fileExists(vcfOnGCS + ".idx"));

    final Index index = IndexFactory.loadIndex(vcfOnGCS + ".idx");
    Assert.assertTrue(index instanceof LinearIndex);
    Assert.assertEquals(index.getSequenceNames(), Arrays.asList("1", "2", "3", "4"));
    checkIndex(index, Arrays.asList("1", "2", "3", "4"));
}
 
Example #5
Source File: IndexFeatureFileIntegrationTest.java    From gatk with BSD 3-Clause "New" or "Revised" License 6 votes vote down vote up
@Test
public void testVCFIndex_inferredName() {
    final File ORIG_FILE = getTestFile("test_variants_for_index.vcf");

    final String[] args = {
            "-I" ,  ORIG_FILE.getAbsolutePath(),
    };
    final Object res = this.runCommandLine(args);
    final Path tribbleIndex = Tribble.indexPath(ORIG_FILE.toPath());
    Assert.assertEquals(res, tribbleIndex.toAbsolutePath().toString());
    tribbleIndex.toFile().deleteOnExit();

    final Index index = IndexFactory.loadIndex(res.toString());
    Assert.assertTrue(index instanceof LinearIndex);
    Assert.assertEquals(index.getSequenceNames(), Arrays.asList("1", "2", "3", "4"));
    checkIndex(index, Arrays.asList("1", "2", "3", "4"));
}
 
Example #6
Source File: IndexFeatureFileIntegrationTest.java    From gatk with BSD 3-Clause "New" or "Revised" License 6 votes vote down vote up
@Test
public void testVCFGZIndex_tabix() {
    final File ORIG_FILE = getTestFile("test_variants_for_index.vcf.blockgz.gz"); //made by bgzip
    final File outName = createTempFile("test_variants_for_index.blockgz.gz.",
        FileExtensions.TABIX_INDEX);

    final String[] args = {
            "-I" ,  ORIG_FILE.getAbsolutePath(),
            "-O" ,  outName.getAbsolutePath()
    };
    final Object res = this.runCommandLine(args);
    Assert.assertEquals(res, outName.getAbsolutePath());

    final Index index = IndexFactory.loadIndex(res.toString());
    Assert.assertTrue(index instanceof TabixIndex);

    Assert.assertEquals(index.getSequenceNames(), Arrays.asList("1", "2", "3", "4"));
    checkIndex(index, Arrays.asList("1", "2", "3", "4"));
}
 
Example #7
Source File: IndexFeatureFileIntegrationTest.java    From gatk with BSD 3-Clause "New" or "Revised" License 6 votes vote down vote up
@Test
public void testVCFGZIndex_inferredName(){
    final File ORIG_FILE = getTestFile("test_variants_for_index.vcf.blockgz.gz"); //made by bgzip
    final String[] args = {
            "-I" ,  ORIG_FILE.getAbsolutePath(),
    };
    final Object res = this.runCommandLine(args);
    final File tabixIndex = new File(ORIG_FILE.getAbsolutePath() + FileExtensions.TABIX_INDEX);
    Assert.assertEquals(res, tabixIndex.getAbsolutePath());
    tabixIndex.deleteOnExit();

    Assert.assertTrue(tabixIndex.exists(), tabixIndex + " does not exists");
    final Index index = IndexFactory.loadIndex(tabixIndex.toString());
    Assert.assertTrue(index instanceof TabixIndex);

    Assert.assertEquals(index.getSequenceNames(), Arrays.asList("1", "2", "3", "4"));
    checkIndex(index, Arrays.asList("1", "2", "3", "4"));
}
 
Example #8
Source File: IndexFeatureFileIntegrationTest.java    From gatk with BSD 3-Clause "New" or "Revised" License 6 votes vote down vote up
@Test
public void testBCFIndex() {
    final File ORIG_FILE = getTestFile("test_variants_for_index.bcf");
    final File outName = createTempFile("test_variants_for_index.bcf.", ".idx");

    final String[] args = {
            "-I" ,  ORIG_FILE.getAbsolutePath(),
            "-O" ,  outName.getAbsolutePath()
    };
    final Object res = this.runCommandLine(args);
    Assert.assertEquals(res, outName.getAbsolutePath());

    final Index index = IndexFactory.loadIndex(res.toString());
    Assert.assertTrue(index instanceof LinearIndex);
    Assert.assertEquals(index.getSequenceNames(), Arrays.asList("1"));
    checkIndex(index, Arrays.asList("1"));
}
 
Example #9
Source File: IndexFeatureFileIntegrationTest.java    From gatk with BSD 3-Clause "New" or "Revised" License 6 votes vote down vote up
@Test
public void testGVCFTreatedAsVCFIndex() {
    // Here we're testing what happens when we have a GVCF that is treated by the tool as a
    // regular VCF due to the lack of a .g.vcf extension
    final File ORIG_FILE = getTestFile("test_variants_for_index.gvcf_treated_as_vcf.vcf");
    final File outName = createTempFile("test_variants_for_index.gvcf_treated_as_vcf.vcf.", ".idx");

    final String[] args = {
            "-I" ,  ORIG_FILE.getAbsolutePath(),
            "-O" ,  outName.getAbsolutePath()
    };
    final Object res = this.runCommandLine(args);
    Assert.assertEquals(res, outName.getAbsolutePath());

    final Index index = IndexFactory.loadIndex(res.toString());
    Assert.assertTrue(index instanceof LinearIndex);
    Assert.assertEquals(index.getSequenceNames(), Arrays.asList("1"));
    checkIndex(index, Arrays.asList("1"));
}
 
Example #10
Source File: IndexFeatureFileIntegrationTest.java    From gatk with BSD 3-Clause "New" or "Revised" License 6 votes vote down vote up
@Test
public void testGVCFIndex() {
    final File ORIG_FILE = getTestFile("test_variants_for_index.g.vcf");
    final File outName = createTempFile("test_variants_for_index.g.vcf.", ".idx");

    final String[] args = {
            "-I" ,  ORIG_FILE.getAbsolutePath(),
            "-O" ,  outName.getAbsolutePath()
    };
    final Object res = this.runCommandLine(args);
    Assert.assertEquals(res, outName.getAbsolutePath());

    final Index index = IndexFactory.loadIndex(res.toString());
    Assert.assertTrue(index instanceof LinearIndex);
    Assert.assertEquals(index.getSequenceNames(), Arrays.asList("1"));
    checkIndex(index, Arrays.asList("1"));
}
 
Example #11
Source File: IndexFeatureFileIntegrationTest.java    From gatk with BSD 3-Clause "New" or "Revised" License 6 votes vote down vote up
@Test
public void testEnsemblGtfIndex() {
    final File outName = createTempFile("Escherichia_coli_str_k_12_substr_mg1655.ASM584v2.44.gtf.", ".idx");

    final String[] args = {
            "-I" ,  ENSEMBL_GTF_TEST_FILE.getAbsolutePath(),
            "-O" ,  outName.getAbsolutePath()
    };
    final Object res = this.runCommandLine(args);
    Assert.assertEquals(res, outName.getAbsolutePath());

    final Index index = IndexFactory.loadIndex(res.toString());
    Assert.assertTrue(index instanceof LinearIndex);
    Assert.assertEquals(index.getSequenceNames(), Collections.singletonList("Chromosome"));
    checkIndex(index, Collections.singletonList("Chromosome"));
}
 
Example #12
Source File: IndexFeatureFile.java    From gatk with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
private Index createAppropriateIndexInMemory(final FeatureCodec<? extends Feature, ?> codec) {
    try {
        // For block-compression files, write a Tabix index
        if (IOUtil.hasBlockCompressedExtension(featurePath.toPath())) {
            // Creating tabix indices with a non standard extensions can cause problems so we disable it
            if (outputPath != null && !outputPath.getURIString().endsWith(FileExtensions.TABIX_INDEX)) {
                throw new UserException("The index for " + featurePath + " must be written to a file with a \"" + FileExtensions.TABIX_INDEX + "\" extension");
            }

            // TODO: this could benefit from provided sequence dictionary from reference
            // TODO: this can be an optional parameter for the tool
            return IndexFactory.createIndex(featurePath.toPath(), codec, IndexFactory.IndexType.TABIX, null);
        }
        // TODO: detection of GVCF files should not be file-extension-based. Need to come up with canonical
        // TODO: way of detecting GVCFs based on the contents (may require changes to the spec!)
        else if (featurePath.getURIString().endsWith(GVCF_FILE_EXTENSION)) {
            // Optimize GVCF indices for the use case of having a large number of GVCFs open simultaneously
            return IndexFactory.createLinearIndex(featurePath.toPath(), codec, OPTIMAL_GVCF_INDEX_BIN_SIZE);
        } else {
            // Optimize indices for other kinds of files for seek time / querying
            return IndexFactory.createDynamicIndex(featurePath.toPath(), codec, IndexFactory.IndexBalanceApproach.FOR_SEEK_TIME);
        }
    } catch (TribbleException e) {
        // Underlying cause here is usually a malformed file, but can also be things like
        // "codec does not support tabix"
        throw new UserException.CouldNotIndexFile(featurePath.toPath(), e);
    }
}
 
Example #13
Source File: IndexUtilsUnitTest.java    From gatk with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
@Test
public void testCheckIndexModificationTime() throws Exception {
    final File vcf = new File(getToolTestDataDir(), "test_variants_for_index.vcf");
    final File vcfIdx = new File(getToolTestDataDir(), "test_variants_for_index.vcf.idx");
    final Index index = IndexFactory.loadIndex(vcfIdx.getAbsolutePath());
    IndexUtils.checkIndexVersionAndModificationTime(vcf, vcfIdx, index);//no blowup
}