htsjdk.samtools.SAMException Java Examples

The following examples show how to use htsjdk.samtools.SAMException. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: ReferenceResource.java    From VarDictJava with MIT License 6 votes vote down vote up
/**
 * Method convert fasta data to array contains header and nucleotide bases.
 * @param fasta path to fasta file
 * @param chr chromosome name of region
 * @param start start position of region
 * @param end end position of region
 * @return array of nucleotide bases in the region of fasta
 */
public String[] retrieveSubSeq(String fasta, String chr, int start, int end) {
    try {
        IndexedFastaSequenceFile idx = fetchFasta(fasta);
        ReferenceSequence seq = idx.getSubsequenceAt(chr, start, end);
        byte[] bases = seq.getBases();
        return new String[]{">" + chr + ":" + start + "-" + end, bases != null ? new String(bases) : ""};
    } catch (SAMException e){
        if (UNABLE_FIND_CONTIG.matcher(e.getMessage()).find()){
            throw new WrongFastaOrBamException(chr, e);
        } else if (WRONG_START_OR_END.matcher(e.getMessage()).find()){
            throw new RegionBoundariesException(chr, start, end, e);
        } else {
            throw e;
        }
    }
}
 
Example #2
Source File: PicardIndexedFastaSequenceFile.java    From chipster with MIT License 6 votes vote down vote up
/**
 * Do some basic checking to make sure the dictionary and the index match.
 * @param fastaFile Used for error reporting only.
 * @param sequenceDictionary sequence dictionary to check against the index.
 * @param index index file to check against the dictionary.
 */
protected static void sanityCheckDictionaryAgainstIndex(final String fastaFile,
                                                        final SAMSequenceDictionary sequenceDictionary,
                                                        final FastaSequenceIndex index) {
    // Make sure dictionary and index are the same size.
    if( sequenceDictionary.getSequences().size() != index.size() )
        throw new SAMException("Sequence dictionary and index contain different numbers of contigs");

    Iterator<SAMSequenceRecord> sequenceIterator = sequenceDictionary.getSequences().iterator();
    Iterator<FastaSequenceIndexEntry> indexIterator = index.iterator();

    while(sequenceIterator.hasNext() && indexIterator.hasNext()) {
        SAMSequenceRecord sequenceEntry = sequenceIterator.next();
        FastaSequenceIndexEntry indexEntry = indexIterator.next();

        if(!sequenceEntry.getSequenceName().equals(indexEntry.getContig())) {
            throw new SAMException(String.format("Mismatch between sequence dictionary fasta index for %s, sequence '%s' != '%s'.",
                    fastaFile, sequenceEntry.getSequenceName(),indexEntry.getContig()));
        }

        // Make sure sequence length matches index length.
        if( sequenceEntry.getSequenceLength() != indexEntry.getSize())
            throw new SAMException("Index length does not match dictionary length for contig: " + sequenceEntry.getSequenceName() );
    }
}
 
Example #3
Source File: PicardIndexedFastaSequenceFile.java    From chipster with MIT License 6 votes vote down vote up
/**
 * Open the given indexed fasta sequence file.  Throw an exception if the file cannot be opened.
 * @param path The file to open.
 * @param index Pre-built FastaSequenceIndex, for the case in which one does not exist on disk.
 */
public PicardIndexedFastaSequenceFile(final Path path, final FastaSequenceIndex index) {
    super(path);
    if (index == null) throw new IllegalArgumentException("Null index for fasta " + path);
    this.index = index;
    IOUtil.assertFileIsReadable(path);
    try {
        this.channel = Files.newByteChannel(path);
    } catch (IOException e) {
        throw new SAMException("Fasta file should be readable but is not: " + path, e);
    }
    reset();

    if(getSequenceDictionary() != null)
        sanityCheckDictionaryAgainstIndex(path.toAbsolutePath().toString(),sequenceDictionary,index);
}
 
Example #4
Source File: SamBamUtils.java    From chipster with MIT License 6 votes vote down vote up
public void indexBam(File bamFile, File baiFile) {
	SAMFileReader.setDefaultValidationStringency(ValidationStringency.SILENT);
       final SamReader bam;

           // input from a normal file
           IOUtil.assertFileIsReadable(bamFile);
           bam = SamReaderFactory.makeDefault().referenceSequence(null)
                   .enable(SamReaderFactory.Option.INCLUDE_SOURCE_IN_RECORDS)
                   .open(bamFile);

       if (bam.type() != SamReader.Type.BAM_TYPE) {
           throw new SAMException("Input file must be bam file, not sam file.");
       }

       if (!bam.getFileHeader().getSortOrder().equals(SAMFileHeader.SortOrder.coordinate)) {
           throw new SAMException("Input bam file must be sorted by coordinate");
       }

       BAMIndexer.createIndex(bam, baiFile);

       CloserUtil.close(bam);
}
 
Example #5
Source File: LiftoverVcfTest.java    From picard with MIT License 6 votes vote down vote up
@Test(expectedExceptions = SAMException.class, expectedExceptionsMessageRegExp = "File exists but is not readable:.*")
public void testUnreadableDictionary() throws IOException {
    final Path liftOutput = Files.createTempFile("tmpouput", ".vcf");
    liftOutput.toFile().deleteOnExit();
    final Path rejectOutput = Files.createTempFile("tmpreject", ".vcf");
    rejectOutput.toFile().deleteOnExit();
    final Path input = TEST_DATA_PATH.toPath().resolve("testLiftoverBiallelicIndels.vcf");
    final Path tmpCopyDir = Files.createTempDirectory("copy");
    tmpCopyDir.toFile().deleteOnExit();
    final Path referenceCopy = tmpCopyDir.resolve("refCopy.fasta");
    referenceCopy.toFile().deleteOnExit();
    Files.copy(REFERENCE_FILE.toPath(), referenceCopy);
    final Path dictCopy = referenceCopy.resolveSibling(referenceCopy.toFile().getName().replaceAll("fasta$", "dict"));
    dictCopy.toFile().deleteOnExit();
    final Path dictionary = REFERENCE_FILE.toPath().resolveSibling(REFERENCE_FILE.getName().replaceAll("fasta$", "dict"));
    Files.copy(dictionary, dictCopy);
    dictCopy.toFile().setReadable(false);
    final String[] args = new String[]{
            "INPUT=" + input,
            "OUTPUT=" + liftOutput,
            "REJECT=" + rejectOutput,
            "CHAIN=" + CHAIN_FILE,
            "REFERENCE_SEQUENCE=" + referenceCopy,
    };
    runPicardCommandLine(args);
}
 
Example #6
Source File: CollectMultipleMetricsTest.java    From picard with MIT License 6 votes vote down vote up
@Test(expectedExceptions = {SAMException.class, CommandLineException.class}, dataProvider = "extraArgumentValue")
public void testMultipleMetricsFailing(final String extra) throws IOException {

    final File input = new File(TEST_DATA_DIR, "summary_alignment_stats_test.sam");
    final File reference = new File(TEST_DATA_DIR, "summary_alignment_stats_test.fasta");
    final File outfile = File.createTempFile("alignmentMetrics", "");
    outfile.deleteOnExit();
    final String[] args = new String[]{
            "INPUT=" + input.getAbsolutePath(),
            "OUTPUT=" + outfile.getAbsolutePath(),
            "REFERENCE_SEQUENCE=" + reference.getAbsolutePath(),
            "METRIC_ACCUMULATION_LEVEL=" + MetricAccumulationLevel.ALL_READS.name(),
            "PROGRAM=null",
            "PROGRAM=" + CollectMultipleMetrics.Program.CollectAlignmentSummaryMetrics.name(),
            "PROGRAM=" + CollectMultipleMetrics.Program.CollectInsertSizeMetrics.name(),
            "EXTRA_ARGUMENT=" + extra
    };
    Assert.assertEquals(runPicardCommandLine(args), 0);
}
 
Example #7
Source File: ConvertSequencingArtifactToOxoGTest.java    From picard with MIT License 5 votes vote down vote up
/**
 * Compare the metrics in two files, ignoring headers and histograms.
 */
public static boolean areMetricsEqual(final File file1, final File file2, final String[] columnsToCompare) throws NoSuchFieldException {
    try (Reader reader1 = new FileReader(file1);
         Reader reader2 = new FileReader(file2)) {
        final MetricsFile<MetricBase, ?> mf1 = new MetricsFile<>();
        final MetricsFile<MetricBase, ?> mf2 = new MetricsFile<>();
        mf1.read(reader1);
        mf2.read(reader2);

        return areMetricsEqual(mf1, mf2, columnsToCompare);
    } catch (IOException e) {
        throw new SAMException(e);
    }
}
 
Example #8
Source File: IndexedFastaDataSource.java    From chipster with MIT License 5 votes vote down vote up
public String query(Chromosome chr, Long start, Long end) {
	
	String chrString = chromosomeNameUnnormaliser.unnormalise(chr);
	
	try {
		ReferenceSequence picardSequence = picard.getSubsequenceAt(chrString, start, end);
		return new String(picardSequence.getBases());

	} catch (SAMException e) {				
		e.printStackTrace(); //Catch "Query asks for data past end of contig" to prevent this thread from ending
		return null;
	}		
}
 
Example #9
Source File: MetricsUtils.java    From gatk with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
/**
 * Write a {@link MetricsFile} to the given path, can be any destination supported by {@link BucketUtils#createFile(String)}
 * @param metricsFile a {@link MetricsFile} object to write to disk
 * @param metricsOutputPath the path (or uri) to write the metrics to
 */
public static void saveMetrics(final MetricsFile<?, ?> metricsFile, String metricsOutputPath) {
    try(Writer out = new BufferedWriter(new OutputStreamWriter(BucketUtils.createFile(metricsOutputPath)))) {
        metricsFile.write(out);
    } catch (IOException | SAMException e ){
        throw new UserException.CouldNotCreateOutputFile("Could not write metrics to file: " + metricsOutputPath, e);
    }
}
 
Example #10
Source File: BedToIntervalListTest.java    From picard with MIT License 5 votes vote down vote up
private void doTest(final String inputBed, final String header) throws IOException, SAMException {
    final File outputFile  = File.createTempFile("bed_to_interval_list_test.", ".interval_list");
    outputFile.deleteOnExit();
    final BedToIntervalList program = new BedToIntervalList();
    final File inputBedFile = new File(TEST_DATA_DIR, inputBed);
    program.INPUT = inputBedFile;
    program.SEQUENCE_DICTIONARY = new File(TEST_DATA_DIR, header);
    program.OUTPUT = outputFile;
    program.UNIQUE = true;
    program.doWork();

    // Assert they are equal
    IOUtil.assertFilesEqual(new File(inputBedFile.getAbsolutePath() + ".interval_list"), outputFile);
}
 
Example #11
Source File: CollectGcBiasMetricsTest.java    From picard with MIT License 5 votes vote down vote up
@BeforeTest
void setupBuilder() throws IOException {
    tempSamFileChrM_O = VcfTestUtils.createTemporaryIndexedFile("CollectGcBias", ".bam");
    tempSamFileAllChr = VcfTestUtils.createTemporaryIndexedFile("CollectGcBias", ".bam");

    final File tempSamFileUnsorted = VcfTestUtils.createTemporaryIndexedFile("CollectGcBias", ".bam");

    final SAMFileHeader header = new SAMFileHeader();

    try {
        header.setSequenceDictionary(SAMSequenceDictionaryExtractor.extractDictionary(dict.toPath()));
        header.setSortOrder(SAMFileHeader.SortOrder.unsorted);
    } catch (final SAMException e) {
        e.printStackTrace();
    }

    //build different levels to put into the same bam file for testing multi level collection
    setupTest1(1, readGroupId1, readGroupRecord1, sample1, library1, header, setBuilder1); //Sample 1, Library 1, RG 1
    setupTest1(2, readGroupId2, readGroupRecord2, sample1, library2, header, setBuilder2); //Sample 1, Library 2, RG 2
    setupTest1(3, readGroupId3, readGroupRecord3, sample2, library3, header, setBuilder3); //Sample 2, Library 3, RG 3

    //build one last readgroup for comparing that window count stays the same whether you use all contigs or not
    setupTest2(1, readGroupId1, readGroupRecord1, sample1, library1, header, setBuilder4);

    final List<SAMRecordSetBuilder> test1Builders = new ArrayList<>();
    test1Builders.add(setBuilder1);
    test1Builders.add(setBuilder2);
    test1Builders.add(setBuilder3);

    final List<SAMRecordSetBuilder> test2Builders = new ArrayList<>();
    test2Builders.add(setBuilder4);

    tempSamFileChrM_O = build(test1Builders, tempSamFileUnsorted, header);
    tempSamFileAllChr = build(test2Builders, tempSamFileUnsorted, header);
}
 
Example #12
Source File: GeneAnnotationReader.java    From Drop-seq with MIT License 5 votes vote down vote up
public static OverlapDetector<Gene> loadAnnotationsFile(final File annotationFile, final SAMSequenceDictionary sequenceDictionary) {

		// trim off the potential compression tags on the file.
		String f = annotationFile.getName();
		if (f.endsWith(".bz2")) f=f.replaceAll("\\.bz2", "");
		if (f.endsWith(".gz")) f=f.replaceAll("\\.gz", "");

		if (f.endsWith(".gtf"))
			return loadGTFFile(annotationFile, sequenceDictionary);
		else if (f.endsWith(".refFlat"))
			return loadRefFlat(annotationFile, sequenceDictionary);

		throw new SAMException("Unable to determine file format for gene annotation file.  Expect [.gtf | .refFlat]");
	}
 
Example #13
Source File: CheckFingerprintTest.java    From picard with MIT License 5 votes vote down vote up
@Test(dataProvider = "badData", expectedExceptions = {MalformedFeatureFile.class, SAMException.class})
public void testBadData(final String inputVcf,
                        final String outputLoc,
                        final String genotypesFile,
                        final File haplotypeFile) {
    String[] args = new String[]{
            "I=" + inputVcf,
            "O=" + outputLoc,
            "G=" + genotypesFile,
            "H=" + haplotypeFile.getAbsolutePath()
    };
    Assert.assertEquals(runPicardCommandLine(args), 0);
}
 
Example #14
Source File: SamToFastqTest.java    From picard with MIT License 5 votes vote down vote up
@Test (dataProvider = "badGroupedFiles", expectedExceptions=SAMException.class)
public void testBadGroupedFileOutputPerRg(final String samFilename) throws IOException {
    convertFile(new String[]{
            "INPUT=" + TEST_DATA_DIR + "/" + samFilename,
            "OUTPUT_DIR=" + IOUtil.getDefaultTmpDir().getAbsolutePath() + "/",
            "OUTPUT_PER_RG=true"
    });
}
 
Example #15
Source File: CheckIlluminaDirectoryTest.java    From picard with MIT License 5 votes vote down vote up
@Test(expectedExceptions = SAMException.class)
public void basedirDoesntExistTest() {
    final String[] args = makeCheckerArgs(new File("a_made_up_file/in_some_weird_location"), 1, "76T76T",
            new IlluminaDataType[]{IlluminaDataType.Position},
            new ArrayList<>(), false, false);
    runPicardCommandLine(args);
}
 
Example #16
Source File: FilterFileReaderTest.java    From picard with MIT License 5 votes vote down vote up
@Test(dataProvider = "failingFilesForSAMException", expectedExceptions = SAMException.class)
public void readInvalidValuesForSAMException(final String failingFile) {
    final FilterFileReader reader = new FilterFileReader(new File(TEST_DATA_DIR, failingFile));
    while(reader.hasNext()) {
        reader.next();
    }
}
 
Example #17
Source File: IntervalListTools.java    From picard with MIT License 5 votes vote down vote up
static IntervalListInputType forFile(final File intervalListExtractable) {
    for (final IntervalListInputType intervalListInputType : IntervalListInputType.values()) {
        for (final String s : intervalListInputType.applicableExtensions) {
            if (intervalListExtractable.getName().endsWith(s)) {
                return intervalListInputType;
            }
        }
    }
    throw new SAMException("Cannot figure out type of file " + intervalListExtractable.getAbsolutePath() + " from extension. Current implementation understands the following types: " + Arrays.toString(IntervalListInputType.values()));
}
 
Example #18
Source File: FastqToSam.java    From picard with MIT License 5 votes vote down vote up
/**
 * Get a list of FASTQs that are sequentially numbered based on the first (base) fastq.
 * The files should be named:
 *   <prefix>_001.<extension>, <prefix>_002.<extension>, ..., <prefix>_XYZ.<extension>
 * The base files should be:
 *   <prefix>_001.<extension>
 * An example would be:
 *   RUNNAME_S8_L005_R1_001.fastq
 *   RUNNAME_S8_L005_R1_002.fastq
 *   RUNNAME_S8_L005_R1_003.fastq
 *   RUNNAME_S8_L005_R1_004.fastq
 * where `baseFastq` is the first in that list.
 */
protected static List<File> getSequentialFileList(final File baseFastq) {
    final List<File> files = new ArrayList<>();
    files.add(baseFastq);

    // Find the correct extension used in the base FASTQ
    FastqExtensions fastqExtensions = null;
    String suffix = null; // store the suffix including the extension
    for (final FastqExtensions ext : FastqExtensions.values()) {
        suffix = "_001" + ext.getExtension();
        if (baseFastq.getAbsolutePath().endsWith(suffix)) {
            fastqExtensions = ext;
            break;
        }
    }
    if (null == fastqExtensions) {
        throw new PicardException(String.format("Could not parse the FASTQ extension (expected '_001' + '%s'): %s", FastqExtensions.values().toString(), baseFastq));
    }

    // Find all the files
    for (int idx = 2; true; idx++) {
        String fastq = baseFastq.getAbsolutePath();
        fastq = String.format("%s_%03d%s", fastq.substring(0, fastq.length() - suffix.length()), idx, fastqExtensions.getExtension());
        try {
            IOUtil.assertFileIsReadable(new File(fastq));
        } catch (final SAMException e) { // the file is not readable, so do not continue
            break;
        }
        files.add(new File(fastq));
    }

    return files;
}
 
Example #19
Source File: SimpleMarkDuplicatesWithMateCigarTest.java    From picard with MIT License 4 votes vote down vote up
/** We require mate cigars for this tool */
@Test(expectedExceptions = SAMException.class)
@Override
public void testTwoMappedPairsWithSoftClippingFirstOfPairOnlyNoMateCigar() {
    super.testTwoMappedPairsWithSoftClippingFirstOfPairOnlyNoMateCigar();
}
 
Example #20
Source File: FastqToSamTest.java    From picard with MIT License 4 votes vote down vote up
@Test(dataProvider = "badFormatFiles", expectedExceptions= SAMException.class)
public void testBadFile(final String filename) throws IOException {
    convertFile(filename, null, FastqQualityFormat.Standard);
}
 
Example #21
Source File: FastqToSamTest.java    From picard with MIT License 4 votes vote down vote up
@Test(dataProvider = "badVersionFiles", expectedExceptions= SAMException.class)
public void testFastqVersionBad(final String fastqVersionFilename, final FastqQualityFormat version) throws IOException {
    convertFile(fastqVersionFilename, version);
}
 
Example #22
Source File: FastqToSamTest.java    From picard with MIT License 4 votes vote down vote up
@Test(dataProvider = "permissiveFormatFiles",expectedExceptions = SAMException.class)
public void testPermissiveFail(final String filename1, final String filename2, final FastqQualityFormat version) throws IOException {
    convertFile(filename1,filename2,version,false);
}
 
Example #23
Source File: FixMateInformationTest.java    From picard with MIT License 4 votes vote down vote up
@Test(expectedExceptions = SAMException.class)
public void ignoreMissingMateExceptionTest() throws IOException {
    missingMateTestHelper(false);
}
 
Example #24
Source File: BedToIntervalListTest.java    From picard with MIT License 4 votes vote down vote up
@Test(dataProvider = "testBedToIntervalListSequenceDictionaryBadDataProvider",
      expectedExceptions = {SAMException.class, PicardException.class})
public void testBedToIntervalListBadSequenceDictionary(final String dictionary) throws IOException {
    doTest("seq_dict_test.bed", dictionary);
}
 
Example #25
Source File: ReadsPathDataSource.java    From gatk with BSD 3-Clause "New" or "Revised" License 4 votes vote down vote up
/**
 * Initialize this data source with multiple SAM/BAM/CRAM files, explicit indices for those files,
 * and a custom SamReaderFactory.
 *
 * @param samPaths paths to SAM/BAM/CRAM files, not null
 * @param samIndices indices for all of the SAM/BAM/CRAM files, in the same order as samPaths. May be null,
 *                   in which case index paths are inferred automatically.
 * @param customSamReaderFactory SamReaderFactory to use, if null a default factory with no reference and validation
 *                               stringency SILENT is used.
 * @param cloudWrapper caching/prefetching wrapper for the data, if on Google Cloud.
 * @param cloudIndexWrapper caching/prefetching wrapper for the index, if on Google Cloud.
 */
public ReadsPathDataSource( final List<Path> samPaths, final List<Path> samIndices,
                           SamReaderFactory customSamReaderFactory,
                           Function<SeekableByteChannel, SeekableByteChannel> cloudWrapper,
                           Function<SeekableByteChannel, SeekableByteChannel> cloudIndexWrapper ) {
    Utils.nonNull(samPaths);
    Utils.nonEmpty(samPaths, "ReadsPathDataSource cannot be created from empty file list");

    if ( samIndices != null && samPaths.size() != samIndices.size() ) {
        throw new UserException(String.format("Must have the same number of BAM/CRAM/SAM paths and indices. Saw %d BAM/CRAM/SAMs but %d indices",
                                              samPaths.size(), samIndices.size()));
    }

    readers = new LinkedHashMap<>(samPaths.size() * 2);
    backingPaths = new LinkedHashMap<>(samPaths.size() * 2);
    indicesAvailable = true;

    final SamReaderFactory samReaderFactory =
            customSamReaderFactory == null ?
                SamReaderFactory.makeDefault().validationStringency(ReadConstants.DEFAULT_READ_VALIDATION_STRINGENCY) :
                customSamReaderFactory;

    int samCount = 0;
    for ( final Path samPath : samPaths ) {
        // Ensure each file can be read
        try {
            IOUtil.assertFileIsReadable(samPath);
        }
        catch ( SAMException|IllegalArgumentException e ) {
            throw new UserException.CouldNotReadInputFile(samPath.toString(), e);
        }

        Function<SeekableByteChannel, SeekableByteChannel> wrapper =
            (BucketUtils.isEligibleForPrefetching(samPath)
                ? cloudWrapper
                : Function.identity());
        // if samIndices==null then we'll guess the index name from the file name.
        // If the file's on the cloud, then the search will only consider locations that are also
        // in the cloud.
        Function<SeekableByteChannel, SeekableByteChannel> indexWrapper =
            ((samIndices != null && BucketUtils.isEligibleForPrefetching(samIndices.get(samCount))
             || (samIndices == null && BucketUtils.isEligibleForPrefetching(samPath)))
                ? cloudIndexWrapper
                : Function.identity());

        SamReader reader;
        if ( samIndices == null ) {
            reader = samReaderFactory.open(samPath, wrapper, indexWrapper);
        }
        else {
            final SamInputResource samResource = SamInputResource.of(samPath, wrapper);
            Path indexPath = samIndices.get(samCount);
            samResource.index(indexPath, indexWrapper);
            reader = samReaderFactory.open(samResource);
        }

        // Ensure that each file has an index
        if ( ! reader.hasIndex() ) {
            indicesAvailable = false;
        }

        readers.put(reader, null);
        backingPaths.put(reader, samPath);
        ++samCount;
    }

    // Prepare a header merger only if we have multiple readers
    headerMerger = samPaths.size() > 1 ? createHeaderMerger() : null;
}
 
Example #26
Source File: CreateSequenceDictionary.java    From picard with MIT License 4 votes vote down vote up
/**
 * Load the file ALT_NAMES containing the alternative contig names
 *
 * @return a <code>Map&lt;src_contig,Set&lt;new_names&gt;&gt;</code>. Never null. May be empty if ALT_NAMES is null.
 * @throws PicardException if there is any error in the file ALT_NAMES
 * @author Pierre Lindenbaum
 */
private Map<String, Set<String>> loadContigAliasesMap() throws PicardException {
    // return an empty map if no mapping file was provided
    if (this.ALT_NAMES == null) {
        return Collections.emptyMap();
    }
    // the map returned by the function
    final Map<String, Set<String>> aliasesByContig = new HashMap<>();
    try {
        for (final String line : IOUtil.slurpLines(this.ALT_NAMES)) {
            if (StringUtil.isBlank(line)) {
                continue;
            }
            final int tab = line.indexOf('\t');
            if (tab == -1) {
                throw new IOException("tabulation missing in " + line);
            }
            final String contigName = line.substring(0, tab);
            final String altName = line.substring(tab + 1);
            // check for empty values
            if (StringUtil.isBlank(contigName)) {
                throw new IOException("empty contig in " + line);
            }
            if (StringUtil.isBlank(altName)) {
                throw new IOException("empty alternative name in " + line);
            }
            if (altName.equals(contigName)) {
                continue;
            }
            try {
                SAMSequenceRecord.validateSequenceName(altName);
            } catch (final SAMException exception) {
                throw new IOException("Illegal alternative reference sequence name in " + line, exception);
            }
            // check alias not previously defined as contig
            if (aliasesByContig.containsKey(altName)) {
                throw new IOException("alternate name " + altName +
                        " previously defined as a contig in " + line);
            }
            // check contig not previously defined as alias
            if (aliasesByContig.keySet().stream()
                    // not an error if defined twice for same contig
                    .filter(K -> !K.equals(contigName))
                    .anyMatch(K -> aliasesByContig.get(K).contains(contigName))) {
                throw new IOException("contig  " + contigName +
                        " previously defined as an alternate name in " + line);
            }
            // add alias
            if (!aliasesByContig.containsKey(contigName)) {
                aliasesByContig.put(contigName, new HashSet<>());
            }
            aliasesByContig.get(contigName).add(altName);
        }
        return aliasesByContig;
    } catch (final IOException e) {
        throw new PicardException("Can't read alias file " + ALT_NAMES, e);
    }
}
 
Example #27
Source File: PicardIndexedFastaSequenceFile.java    From chipster with MIT License 4 votes vote down vote up
/**
 * Gets the subsequence of the contig in the range [start,stop]
 * @param contig Contig whose subsequence to retrieve.
 * @param start inclusive, 1-based start of region.
 * @param stop inclusive, 1-based stop of region.
 * @return The partial reference sequence associated with this range.
 */
public ReferenceSequence getSubsequenceAt( String contig, long start, long stop ) {
    if(start > stop + 1)
        throw new SAMException(String.format("Malformed query; start point %d lies after end point %d",start,stop));

    FastaSequenceIndexEntry indexEntry = index.getIndexEntry(contig);

    if(stop > indexEntry.getSize())
        throw new SAMException("Query asks for data past end of contig");

    int length = (int)(stop - start + 1);

    byte[] target = new byte[length];
    ByteBuffer targetBuffer = ByteBuffer.wrap(target);

    final int basesPerLine = indexEntry.getBasesPerLine();
    final int bytesPerLine = indexEntry.getBytesPerLine();
    final int terminatorLength = bytesPerLine - basesPerLine;

    long startOffset = ((start-1)/basesPerLine)*bytesPerLine + (start-1)%basesPerLine;
    // Cast to long so the second argument cannot overflow a signed integer.
    final long minBufferSize = Math.min((long) Defaults.NON_ZERO_BUFFER_SIZE, (long)(length / basesPerLine + 2) * (long)bytesPerLine);
    if (minBufferSize > Integer.MAX_VALUE) throw new SAMException("Buffer is too large: " +  minBufferSize);

    // Allocate a buffer for reading in sequence data.
    final ByteBuffer channelBuffer = ByteBuffer.allocate((int)minBufferSize);

    while(targetBuffer.position() < length) {
        // If the bufferOffset is currently within the eol characters in the string, push the bufferOffset forward to the next printable character.
        startOffset += Math.max((int)(startOffset%bytesPerLine - basesPerLine + 1),0);

        try {
            startOffset += readFromPosition(channel, channelBuffer, indexEntry.getLocation()+startOffset);
        }
        catch(IOException ex) {
            throw new SAMException("Unable to load " + contig + "(" + start + ", " + stop + ") from " + getAbsolutePath(), ex);
        }

        // Reset the buffer for outbound transfers.
        channelBuffer.flip();

        // Calculate the size of the next run of bases based on the contents we've already retrieved.
        final int positionInContig = (int)start-1+targetBuffer.position();
        final int nextBaseSpan = Math.min(basesPerLine-positionInContig%basesPerLine,length-targetBuffer.position());
        // Cap the bytes to transfer by limiting the nextBaseSpan to the size of the channel buffer.
        int bytesToTransfer = Math.min(nextBaseSpan,channelBuffer.capacity());

        channelBuffer.limit(channelBuffer.position()+bytesToTransfer);

        while(channelBuffer.hasRemaining()) {
            targetBuffer.put(channelBuffer);

            bytesToTransfer = Math.min(basesPerLine,length-targetBuffer.position());
            channelBuffer.limit(Math.min(channelBuffer.position()+bytesToTransfer+terminatorLength,channelBuffer.capacity()));
            channelBuffer.position(Math.min(channelBuffer.position()+terminatorLength,channelBuffer.capacity()));
        }

        // Reset the buffer for inbound transfers.
        channelBuffer.flip();
    }

    return new ReferenceSequence( contig, indexEntry.getSequenceIndex(), target );
}
 
Example #28
Source File: ChipsterIndexedFastaSequenceFile.java    From chipster with MIT License 4 votes vote down vote up
/**
 * Gets the subsequence of the contig in the range [start,stop]
 * @param contig Contig whose subsequence to retrieve.
 * @param start inclusive, 1-based start of region.
 * @param stop inclusive, 1-based stop of region.
 * @return The partial reference sequence associated with this range.
 */
public ReferenceSequence getSubsequenceAt( String contig, long start, long stop ) {
    if(start > stop + 1)
        throw new SAMException(String.format("Malformed query; start point %d lies after end point %d",start,stop));

    FastaSequenceIndexEntry indexEntry = index.getIndexEntry(contig);

    if(stop > indexEntry.getSize())
        throw new SAMException("Query asks for data past end of contig");

    int length = (int)(stop - start + 1);

    byte[] target = new byte[length];
    ByteBuffer targetBuffer = ByteBuffer.wrap(target);

    final int basesPerLine = indexEntry.getBasesPerLine();
    final int bytesPerLine = indexEntry.getBytesPerLine();
    final int terminatorLength = bytesPerLine - basesPerLine;

    long startOffset = ((start-1)/basesPerLine)*bytesPerLine + (start-1)%basesPerLine;
    // Cast to long so the second argument cannot overflow a signed integer.
    final long minBufferSize = Math.min((long) BUFFER_SIZE, (long)(length / basesPerLine + 2) * (long)bytesPerLine);
    if (minBufferSize > Integer.MAX_VALUE) throw new SAMException("Buffer is too large: " +  minBufferSize);

    // Allocate a buffer for reading in sequence data.
    final ByteBuffer channelBuffer = ByteBuffer.allocate((int)minBufferSize);

    while(targetBuffer.position() < length) {
        // If the bufferOffset is currently within the eol characters in the string, push the bufferOffset forward to the next printable character.
        startOffset += Math.max((int)(startOffset%bytesPerLine - basesPerLine + 1),0);

        try {
            //startOffset += readFromPosition(channel, channelBuffer, indexEntry.getLocation()+startOffset);
        	byte[] bytes = channel.read(indexEntry.getLocation() + startOffset, channelBuffer.remaining());
        	channelBuffer.put(bytes);
        	startOffset += bytes.length;
        }
        catch(IOException ex) {
            throw new SAMException("Unable to load " + contig + "(" + start + ", " + stop + ") from " + getAbsolutePath(), ex);
        }

        // Reset the buffer for outbound transfers.
        channelBuffer.flip();

        // Calculate the size of the next run of bases based on the contents we've already retrieved.
        final int positionInContig = (int)start-1+targetBuffer.position();
        final int nextBaseSpan = Math.min(basesPerLine-positionInContig%basesPerLine,length-targetBuffer.position());
        // Cap the bytes to transfer by limiting the nextBaseSpan to the size of the channel buffer.
        int bytesToTransfer = Math.min(nextBaseSpan,channelBuffer.capacity());

        channelBuffer.limit(channelBuffer.position()+bytesToTransfer);

        while(channelBuffer.hasRemaining()) {
            targetBuffer.put(channelBuffer);

            bytesToTransfer = Math.min(basesPerLine,length-targetBuffer.position());
            channelBuffer.limit(Math.min(channelBuffer.position()+bytesToTransfer+terminatorLength,channelBuffer.capacity()));
            channelBuffer.position(Math.min(channelBuffer.position()+terminatorLength,channelBuffer.capacity()));
        }

        // Reset the buffer for inbound transfers.
        channelBuffer.flip();
    }

    return new ReferenceSequence( contig, indexEntry.getSequenceIndex(), target );
}
 
Example #29
Source File: ReferenceSequenceFromSeekable.java    From cramtools with Apache License 2.0 4 votes vote down vote up
public byte[] getSubsequenceAt(String contig, long start, long stop) {
	if (start > stop + 1)
		throw new SAMException(
				String.format("Malformed query; start point %d lies after end point %d", start, stop));

	FastaSequenceIndexEntry indexEntry = index.get(contig);

	if (stop > indexEntry.getSize())
		throw new SAMException("Query asks for data past end of contig");

	int length = (int) (stop - start + 1);

	byte[] target = new byte[length];
	ByteBuffer targetBuffer = ByteBuffer.wrap(target);

	final int basesPerLine = indexEntry.getBasesPerLine();
	final int bytesPerLine = indexEntry.getBytesPerLine();
	final int terminatorLength = bytesPerLine - basesPerLine;

	long startOffset = ((start - 1) / basesPerLine) * bytesPerLine + (start - 1) % basesPerLine;

	// Allocate a 128K buffer for reading in sequence data.
	ByteBuffer channelBuffer = ByteBuffer.allocate(BUFFER_SIZE);

	while (targetBuffer.position() < length) {
		// If the bufferOffset is currently within the eol characters in the
		// string, push the bufferOffset forward to the next printable
		// character.
		startOffset += Math.max((int) (startOffset % bytesPerLine - basesPerLine + 1), 0);

		try {
			s.seek(indexEntry.getLocation() + startOffset);
			startOffset += Utils.readInto(channelBuffer, s);
		} catch (IOException ex) {
			throw new SAMException("Unable to load " + contig + "(" + start + ", " + stop + ") from "
					+ s.getSource());
		}

		// Reset the buffer for outbound transfers.
		channelBuffer.flip();

		// Calculate the size of the next run of bases based on the contents
		// we've already retrieved.
		final int positionInContig = (int) start - 1 + targetBuffer.position();
		final int nextBaseSpan = Math.min(basesPerLine - positionInContig % basesPerLine,
				length - targetBuffer.position());
		// Cap the bytes to transfer by limiting the nextBaseSpan to the
		// size of the channel buffer.
		int bytesToTransfer = Math.min(nextBaseSpan, channelBuffer.capacity());

		channelBuffer.limit(channelBuffer.position() + bytesToTransfer);

		while (channelBuffer.hasRemaining()) {
			targetBuffer.put(channelBuffer);

			bytesToTransfer = Math.min(basesPerLine, length - targetBuffer.position());
			channelBuffer.limit(Math.min(channelBuffer.position() + bytesToTransfer + terminatorLength,
					channelBuffer.capacity()));
			channelBuffer.position(Math.min(channelBuffer.position() + terminatorLength, channelBuffer.capacity()));
		}

		// Reset the buffer for inbound transfers.
		channelBuffer.flip();
	}

	return target;
}