htsjdk.samtools.SamInputResource Java Examples

The following examples show how to use htsjdk.samtools.SamInputResource. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: BAMRecordReader.java    From Hadoop-BAM with MIT License 6 votes vote down vote up
private SamReader createSamReader(SeekableStream in, SeekableStream inIndex,
		ValidationStringency stringency, boolean useIntelInflater) {
	SamReaderFactory readerFactory = SamReaderFactory.makeDefault()
			.setOption(SamReaderFactory.Option.CACHE_FILE_BASED_INDEXES, true)
			.setOption(SamReaderFactory.Option.EAGERLY_DECODE, false)
			.setUseAsyncIo(false);
	if (stringency != null) {
		readerFactory.validationStringency(stringency);
	}
	SamInputResource resource = SamInputResource.of(in);
	if (inIndex != null) {
		resource.index(inIndex);
	}
	if (useIntelInflater) {
		readerFactory.inflaterFactory(IntelGKLAccessor.newInflatorFactor());
	}
	return readerFactory.open(resource);
}
 
Example #2
Source File: SAMHeaderReader.java    From Hadoop-BAM with MIT License 6 votes vote down vote up
/** Does not close the stream. */
public static SAMFileHeader readSAMHeaderFrom(
	final InputStream in, final Configuration conf)
{
	final ValidationStringency
		stringency = getValidationStringency(conf);
	SamReaderFactory readerFactory = SamReaderFactory.makeDefault()
			.setOption(SamReaderFactory.Option.EAGERLY_DECODE, false)
			.setUseAsyncIo(false);
	if (stringency != null) {
		readerFactory.validationStringency(stringency);
	}

	final ReferenceSource refSource = getReferenceSource(conf);
	if (null != refSource) {
		readerFactory.referenceSource(refSource);
	}
	return readerFactory.open(SamInputResource.of(in)).getFileHeader();
}
 
Example #3
Source File: BAMIO.java    From dataflow-java with Apache License 2.0 6 votes vote down vote up
private static SamReader openBAMReader(SamInputResource resource, ValidationStringency stringency, boolean includeFileSource, long offset) throws IOException {
  SamReaderFactory samReaderFactory = SamReaderFactory
      .makeDefault()
      .validationStringency(stringency)
      .enable(SamReaderFactory.Option.CACHE_FILE_BASED_INDEXES);
  if (includeFileSource) {
    samReaderFactory.enable(SamReaderFactory.Option.INCLUDE_SOURCE_IN_RECORDS);
  }
  if (offset == 0) {
    return samReaderFactory.open(resource);
  }
  LOG.info("Initializing seeking reader with the offset of " + offset);
  SeekingBAMFileReader primitiveReader = new SeekingBAMFileReader(resource,
      false,
      stringency,
      DefaultSAMRecordFactory.getInstance(),
      offset);
  final SeekingReaderAdapter reader =
      new SeekingReaderAdapter(primitiveReader, resource);
  samReaderFactory.reapplyOptions(reader);
  return reader;
}
 
Example #4
Source File: BamSlicerApplication.java    From hmftools with GNU General Public License v3.0 5 votes vote down vote up
private static void sliceFromURLs(@NotNull URL indexUrl, @NotNull URL bamUrl, @NotNull CommandLine cmd) throws IOException {
    File indexFile = downloadIndex(indexUrl);
    indexFile.deleteOnExit();

    SamReader reader = createFromCommandLine(cmd).open(SamInputResource.of(bamUrl).index(indexFile));

    BAMIndex bamIndex;
    if (indexFile.getPath().contains(".crai")) {
        SeekableStream craiIndex = CRAIIndex.openCraiFileAsBaiStream(indexFile, reader.getFileHeader().getSequenceDictionary());
        bamIndex = new DiskBasedBAMFileIndex(craiIndex, reader.getFileHeader().getSequenceDictionary());
    } else {
        bamIndex = new DiskBasedBAMFileIndex(indexFile, reader.getFileHeader().getSequenceDictionary(), false);
    }

    Optional<Pair<QueryInterval[], BAMFileSpan>> queryIntervalsAndSpan = queryIntervalsAndSpan(reader, bamIndex, cmd);
    Optional<Chunk> unmappedChunk = getUnmappedChunk(bamIndex, HttpUtils.getHeaderField(bamUrl, "Content-Length"), cmd);
    List<Chunk> sliceChunks = sliceChunks(queryIntervalsAndSpan, unmappedChunk);
    SamReader cachingReader = createCachingReader(indexFile, bamUrl, cmd, sliceChunks);

    SAMFileWriter writer = new SAMFileWriterFactory().setCreateIndex(true)
            .makeBAMWriter(reader.getFileHeader(), true, new File(cmd.getOptionValue(OUTPUT)));

    queryIntervalsAndSpan.ifPresent(pair -> {
        LOGGER.info("Slicing bam on bed regions...");
        CloseableIterator<SAMRecord> bedIterator = getIterator(cachingReader, pair.getKey(), pair.getValue().toCoordinateArray());
        writeToSlice(writer, bedIterator);
        LOGGER.info("Done writing bed slices.");
    });

    unmappedChunk.ifPresent(chunk -> {
        LOGGER.info("Slicing unmapped reads...");
        CloseableIterator<SAMRecord> unmappedIterator = cachingReader.queryUnmapped();
        writeToSlice(writer, unmappedIterator);
        LOGGER.info("Done writing unmapped reads.");
    });

    reader.close();
    writer.close();
    cachingReader.close();
}
 
Example #5
Source File: BamSlicerApplication.java    From hmftools with GNU General Public License v3.0 5 votes vote down vote up
@NotNull
private static SamReader createCachingReader(@NotNull File indexFile, @NotNull URL bamUrl, @NotNull CommandLine cmd,
        @NotNull List<Chunk> sliceChunks) throws IOException {
    OkHttpClient httpClient =
            SlicerHttpClient.create(Integer.parseInt(cmd.getOptionValue(MAX_CONCURRENT_REQUESTS, MAX_CONCURRENT_REQUESTS_DEFAULT)));
    int maxBufferSize = readMaxBufferSize(cmd);

    SamInputResource bamResource =
            SamInputResource.of(new CachingSeekableHTTPStream(httpClient, bamUrl, sliceChunks, maxBufferSize)).index(indexFile);
    SamReaderFactory readerFactory = createFromCommandLine(cmd);

    return readerFactory.open(bamResource);
}
 
Example #6
Source File: SamUtils.java    From rtg-tools with BSD 2-Clause "Simplified" License 5 votes vote down vote up
/**
 * Entry point for specifically creating a SamReader given a pre-positioned stream, header, and known type
 * @param stream the stream to read from. Must already be performing decompression if required.
 * @param reference the SequencesReader to be used as the reference (required for CRAM files).
 * @param headerOverride the pre-determined SAM header
 * @param assumeType the type of input to assume.
 * @return the SamReader
 * @throws IOException if an I/O problem occurs opening the file
 */
public static SamReader makeSamReader(InputStream stream, SequencesReader reference, SAMFileHeader headerOverride, SamReader.Type assumeType) throws IOException {
  if (assumeType == null) {
    throw new NullPointerException();
  }
  try {
    return getSamReaderFactory(reference)
      .open(SamInputResource.of(stream).header(headerOverride).assumeType(assumeType));
  } catch (final RuntimeIOException e) {
    throw (IOException) e.getCause();
  }
}
 
Example #7
Source File: SAMRecordReader.java    From Hadoop-BAM with MIT License 5 votes vote down vote up
private SamReader createSamReader(InputStream in, ValidationStringency stringency) {
	SamReaderFactory readerFactory = SamReaderFactory.makeDefault()
			.setOption(SamReaderFactory.Option.EAGERLY_DECODE, false)
			.setUseAsyncIo(false);
	if (stringency != null) {
		readerFactory.validationStringency(stringency);
	}
	return readerFactory.open(SamInputResource.of(in));
}
 
Example #8
Source File: BAMIO.java    From dataflow-java with Apache License 2.0 5 votes vote down vote up
private static SamInputResource openBAMFile(Storage.Objects storageClient, String gcsStoragePath, SeekableStream index) throws IOException {
  SeekableGCSStream s = new SeekableGCSStream(storageClient, gcsStoragePath);
  SamInputResource samInputResource =
      SamInputResource.of(s);

  if (index != null) {
    samInputResource.index(index);
  }

  LOG.info("getReadsFromBAMFile - got input resources");
  return samInputResource;
}
 
Example #9
Source File: ViewSam.java    From picard with MIT License 4 votes vote down vote up
/**
 * This is factored out of doWork only for unit testing.
 */
int writeSamText(PrintStream printStream) {
    try {
        final CloseableIterator<SAMRecord> samRecordsIterator;
        final SamReader samReader = SamReaderFactory.makeDefault()
                .referenceSequence(REFERENCE_SEQUENCE)
                .open(SamInputResource.of(INPUT));

        // if we are only using the header or we aren't using intervals, then use the reader as the iterator.
        // otherwise use the SamRecordIntervalIteratorFactory to make an interval-ed iterator
        if (HEADER_ONLY || INTERVAL_LIST == null) {
            samRecordsIterator = samReader.iterator();
        } else {
            IOUtil.assertFileIsReadable(INTERVAL_LIST);

            final List<Interval> intervals = IntervalList.fromFile(INTERVAL_LIST).uniqued().getIntervals();
            samRecordsIterator = new SamRecordIntervalIteratorFactory().makeSamRecordIntervalIterator(samReader, intervals, samReader.hasIndex());
        }
        final AsciiWriter writer = new AsciiWriter(printStream);
        final SAMFileHeader header = samReader.getFileHeader();
        if (!RECORDS_ONLY) {
            if (header.getTextHeader() != null) {
                writer.write(header.getTextHeader());
            } else {
                // Headers that are too large are not retained as text, so need to regenerate text
                new SAMTextHeaderCodec().encode(writer, header, true);
            }
        }
        if (!HEADER_ONLY) {
            while (samRecordsIterator.hasNext()) {
                final SAMRecord rec = samRecordsIterator.next();

                if (printStream.checkError()) {
                    return 1;
                }

                if (this.ALIGNMENT_STATUS == AlignmentStatus.Aligned && rec.getReadUnmappedFlag()) continue;
                if (this.ALIGNMENT_STATUS == AlignmentStatus.Unaligned && !rec.getReadUnmappedFlag()) continue;

                if (this.PF_STATUS == PfStatus.PF && rec.getReadFailsVendorQualityCheckFlag()) continue;
                if (this.PF_STATUS == PfStatus.NonPF && !rec.getReadFailsVendorQualityCheckFlag()) continue;
                writer.write(rec.getSAMString());
            }
        }
        writer.flush();
        if (printStream.checkError()) {
            return 1;
        }
        CloserUtil.close(writer);
        CloserUtil.close(samRecordsIterator);
        return 0;
    } catch (IOException e) {
        throw new PicardException("Exception writing SAM text", e);
    }
}
 
Example #10
Source File: AddOrReplaceReadGroups.java    From picard with MIT License 4 votes vote down vote up
protected int doWork() {
    IOUtil.assertInputIsValid(INPUT);
    IOUtil.assertFileIsWritable(OUTPUT);

    final SamReader in = SamReaderFactory.makeDefault()
        .referenceSequence(REFERENCE_SEQUENCE)
        .open(SamInputResource.of(INPUT));

    // create the read-group we'll be using
    final SAMReadGroupRecord rg = new SAMReadGroupRecord(RGID);
    rg.setLibrary(RGLB);
    rg.setPlatform(RGPL);
    rg.setSample(RGSM);
    rg.setPlatformUnit(RGPU);
    if (RGCN != null) rg.setSequencingCenter(RGCN);
    if (RGDS != null) rg.setDescription(RGDS);
    if (RGDT != null) rg.setRunDate(RGDT);
    if (RGPI != null) rg.setPredictedMedianInsertSize(RGPI);
    if (RGPG != null) rg.setProgramGroup(RGPG);
    if (RGPM != null) rg.setPlatformModel(RGPM);
    if (RGKS != null) rg.setKeySequence(RGKS);
    if (RGFO != null) rg.setFlowOrder(RGFO);

    log.info(String.format("Created read-group ID=%s PL=%s LB=%s SM=%s%n", rg.getId(), rg.getPlatform(), rg.getLibrary(), rg.getSample()));

    // create the new header and output file
    final SAMFileHeader inHeader = in.getFileHeader();
    final SAMFileHeader outHeader = inHeader.clone();
    outHeader.setReadGroups(Collections.singletonList(rg));
    if (SORT_ORDER != null) outHeader.setSortOrder(SORT_ORDER);

    final SAMFileWriter outWriter = new SAMFileWriterFactory().makeSAMOrBAMWriter(outHeader,
            outHeader.getSortOrder() == inHeader.getSortOrder(),
            OUTPUT);

    final ProgressLogger progress = new ProgressLogger(log);
    for (final SAMRecord read : in) {
        read.setAttribute(SAMTag.RG.name(), RGID);
        outWriter.addAlignment(read);
        progress.record(read);
    }

    // cleanup
    CloserUtil.close(in);
    outWriter.close();
    return 0;
}
 
Example #11
Source File: BAMIO.java    From dataflow-java with Apache License 2.0 4 votes vote down vote up
public SeekingReaderAdapter(SeekingBAMFileReader reader, SamInputResource resource){
    super(reader, resource);
    underlyingReader = reader;
}
 
Example #12
Source File: ReadsPathDataSource.java    From gatk with BSD 3-Clause "New" or "Revised" License 4 votes vote down vote up
/**
 * Initialize this data source with multiple SAM/BAM/CRAM files, explicit indices for those files,
 * and a custom SamReaderFactory.
 *
 * @param samPaths paths to SAM/BAM/CRAM files, not null
 * @param samIndices indices for all of the SAM/BAM/CRAM files, in the same order as samPaths. May be null,
 *                   in which case index paths are inferred automatically.
 * @param customSamReaderFactory SamReaderFactory to use, if null a default factory with no reference and validation
 *                               stringency SILENT is used.
 * @param cloudWrapper caching/prefetching wrapper for the data, if on Google Cloud.
 * @param cloudIndexWrapper caching/prefetching wrapper for the index, if on Google Cloud.
 */
public ReadsPathDataSource( final List<Path> samPaths, final List<Path> samIndices,
                           SamReaderFactory customSamReaderFactory,
                           Function<SeekableByteChannel, SeekableByteChannel> cloudWrapper,
                           Function<SeekableByteChannel, SeekableByteChannel> cloudIndexWrapper ) {
    Utils.nonNull(samPaths);
    Utils.nonEmpty(samPaths, "ReadsPathDataSource cannot be created from empty file list");

    if ( samIndices != null && samPaths.size() != samIndices.size() ) {
        throw new UserException(String.format("Must have the same number of BAM/CRAM/SAM paths and indices. Saw %d BAM/CRAM/SAMs but %d indices",
                                              samPaths.size(), samIndices.size()));
    }

    readers = new LinkedHashMap<>(samPaths.size() * 2);
    backingPaths = new LinkedHashMap<>(samPaths.size() * 2);
    indicesAvailable = true;

    final SamReaderFactory samReaderFactory =
            customSamReaderFactory == null ?
                SamReaderFactory.makeDefault().validationStringency(ReadConstants.DEFAULT_READ_VALIDATION_STRINGENCY) :
                customSamReaderFactory;

    int samCount = 0;
    for ( final Path samPath : samPaths ) {
        // Ensure each file can be read
        try {
            IOUtil.assertFileIsReadable(samPath);
        }
        catch ( SAMException|IllegalArgumentException e ) {
            throw new UserException.CouldNotReadInputFile(samPath.toString(), e);
        }

        Function<SeekableByteChannel, SeekableByteChannel> wrapper =
            (BucketUtils.isEligibleForPrefetching(samPath)
                ? cloudWrapper
                : Function.identity());
        // if samIndices==null then we'll guess the index name from the file name.
        // If the file's on the cloud, then the search will only consider locations that are also
        // in the cloud.
        Function<SeekableByteChannel, SeekableByteChannel> indexWrapper =
            ((samIndices != null && BucketUtils.isEligibleForPrefetching(samIndices.get(samCount))
             || (samIndices == null && BucketUtils.isEligibleForPrefetching(samPath)))
                ? cloudIndexWrapper
                : Function.identity());

        SamReader reader;
        if ( samIndices == null ) {
            reader = samReaderFactory.open(samPath, wrapper, indexWrapper);
        }
        else {
            final SamInputResource samResource = SamInputResource.of(samPath, wrapper);
            Path indexPath = samIndices.get(samCount);
            samResource.index(indexPath, indexWrapper);
            reader = samReaderFactory.open(samResource);
        }

        // Ensure that each file has an index
        if ( ! reader.hasIndex() ) {
            indicesAvailable = false;
        }

        readers.put(reader, null);
        backingPaths.put(reader, samPath);
        ++samCount;
    }

    // Prepare a header merger only if we have multiple readers
    headerMerger = samPaths.size() > 1 ? createHeaderMerger() : null;
}
 
Example #13
Source File: Transcode.java    From cramtools with Apache License 2.0 4 votes vote down vote up
public static void main(String[] args) throws IOException {
	Params params = new Params();
	JCommander jc = new JCommander(params);
	jc.parse(args);

	Log.setGlobalLogLevel(params.logLevel);

	if (args.length == 0 || params.help) {
		usage(jc);
		System.exit(1);
	}

	if (params.reference == null) {
		System.out.println("Reference file not found, will try downloading...");
	}

	ReferenceSource referenceSource = null;
	if (params.reference != null) {
		System.setProperty("reference", params.reference.getAbsolutePath());
		referenceSource = new ReferenceSource(params.reference);
	} else {
		String prop = System.getProperty("reference");
		if (prop != null) {
			referenceSource = new ReferenceSource(new File(prop));
		}
	}

	SamReaderFactory factory = SamReaderFactory.make().validationStringency(params.validationLevel);
	SamInputResource r;
	if ("file".equalsIgnoreCase(params.url.getProtocol()))
		r = SamInputResource.of(params.url.getPath());
	else
		r = SamInputResource.of(params.url);
	SamReader reader = factory.open(r);
	SAMRecordIterator iterator = reader.iterator();

	SAMFileWriterFactory writerFactory = new SAMFileWriterFactory();
	SAMFileWriter writer = null;
	OutputStream os = new BufferedOutputStream(new FileOutputStream(params.outputFile));
	switch (params.outputFormat) {
	case BAM:
		writer = writerFactory.makeBAMWriter(reader.getFileHeader(),
				reader.getFileHeader().getSortOrder() == SortOrder.coordinate, os);
		break;
	case CRAM:
		writer = writerFactory.makeCRAMWriter(reader.getFileHeader(), os, params.reference);
		break;

	default:
		System.out.println("Unknown output format: " + params.outputFormat);
		System.exit(1);
	}

	while (iterator.hasNext()) {
		writer.addAlignment(iterator.next());
	}
	writer.close();
	reader.close();
}
 
Example #14
Source File: SamUtils.java    From rtg-tools with BSD 2-Clause "Simplified" License 3 votes vote down vote up
/**
 * Entry point for specifically creating a SamReader given a provided stream and header, but let
 * htsjdk decide the underlying format (including working out whether the input is compressed).
 * @param stream the stream to read from
 * @param reference the SequencesReader to be used as the reference (required for CRAM files).
 * @param headerOverride the pre-determined SAM header (or null to use the header from the stream)
 * @return the SamReader
 * @throws IOException if an I/O problem occurs opening the file
 */
public static SamReader makeSamReader(InputStream stream, SequencesReader reference, SAMFileHeader headerOverride) throws IOException {
  try {
    return getSamReaderFactory(reference).open(SamInputResource.of(stream).header(headerOverride));
  } catch (final RuntimeIOException e) {
    throw (IOException) e.getCause();
  }
}