htsjdk.samtools.BAMIndex Java Examples

The following examples show how to use htsjdk.samtools.BAMIndex. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: BamSlicerApplication.java    From hmftools with GNU General Public License v3.0 6 votes vote down vote up
@NotNull
private static Optional<Chunk> getUnmappedChunk(@NotNull BAMIndex bamIndex, @Nullable String contentLengthString,
        @NotNull CommandLine cmd) {
    if (cmd.hasOption(UNMAPPED)) {
        long startOfLastLinearBin = bamIndex.getStartOfLastLinearBin();
        if (startOfLastLinearBin == -1) {
            LOGGER.warn("Start of last linear bin was -1. No mapped reads found in BAM.");
            return Optional.empty();
        }
        if (contentLengthString != null) {
            try {
                long contentLength = Long.parseLong(contentLengthString);
                // We multiply content length with 2^16 = ~64k. Presumably 'content length' is "in terms of number of 64Kb packets".
                return Optional.of(new Chunk(startOfLastLinearBin, contentLength << 16));
            } catch (NumberFormatException ignored) {
                LOGGER.error("Invalid content length ({}) for bam URL", contentLengthString);
                return Optional.empty();
            }
        }
    }
    return Optional.empty();
}
 
Example #2
Source File: MergeSamFilesTest.java    From picard with MIT License 6 votes vote down vote up
/**
 * Confirm that unsorted input can result in coordinate sorted output, with index created.
 */
@Test
public void unsortedInputSortedOutputTest() throws Exception {
    final File unsortedInputTestDataDir = new File(TEST_DATA_DIR, "unsorted_input");
    final File mergedOutput = File.createTempFile("unsortedInputSortedOutputTest.", BamFileIoUtils.BAM_FILE_EXTENSION);
    mergedOutput.deleteOnExit();
    final File mergedOutputIndex = new File(mergedOutput.getParent(), IOUtil.basename(mergedOutput)+ BAMIndex.BAI_INDEX_SUFFIX);
    mergedOutputIndex.deleteOnExit();

    final String[] args = {
            "I=" + new File(unsortedInputTestDataDir, "1.sam").getAbsolutePath(),
            "I=" + new File(unsortedInputTestDataDir, "2.sam").getAbsolutePath(),
            "O=" + mergedOutput.getAbsolutePath(),
            "CREATE_INDEX=true",
            "SO=coordinate"
    };
    final int mergeExitStatus = runPicardCommandLine(args);
    Assert.assertEquals(mergeExitStatus, 0);
    final SamReader reader = SamReaderFactory.makeDefault().open(mergedOutput);
    Assert.assertEquals(reader.getFileHeader().getSortOrder(), SAMFileHeader.SortOrder.coordinate);
    Assert.assertTrue(reader.hasIndex());
    new ValidateSamTester().assertSamValid(mergedOutput);
    Assert.assertTrue(mergedOutputIndex.delete());
    CloserUtil.close(reader);
}
 
Example #3
Source File: TestBAMInputFormat.java    From Hadoop-BAM with MIT License 6 votes vote down vote up
@Test
public void testMultipleSplitsBaiEnabledSuffixPath() throws Exception {
  input = BAMTestUtil.writeBamFile(1000, SAMFileHeader.SortOrder.coordinate)
      .getAbsolutePath();
  File index = new File(input.replaceFirst("\\.bam$", BAMIndex.BAMIndexSuffix));
  index.renameTo(new File(input + BAMIndex.BAMIndexSuffix));
  completeSetup();
  BAMInputFormat.setEnableBAISplitCalculator(jobContext.getConfiguration(), true);
  jobContext.getConfiguration().setInt(FileInputFormat.SPLIT_MAXSIZE, 40000);
  BAMInputFormat inputFormat = new BAMInputFormat();
  List<InputSplit> splits = inputFormat.getSplits(jobContext);
  assertEquals(3, splits.size());
  List<SAMRecord> split0Records = getSAMRecordsFromSplit(inputFormat, splits.get(0));
  List<SAMRecord> split1Records = getSAMRecordsFromSplit(inputFormat, splits.get(1));
  List<SAMRecord> split2Records = getSAMRecordsFromSplit(inputFormat, splits.get(2));
  assertEquals(1080, split0Records.size());
  assertEquals(524, split1Records.size());
  assertEquals(398, split2Records.size());
}
 
Example #4
Source File: BamSlicerApplication.java    From hmftools with GNU General Public License v3.0 5 votes vote down vote up
private static void sliceFromURLs(@NotNull URL indexUrl, @NotNull URL bamUrl, @NotNull CommandLine cmd) throws IOException {
    File indexFile = downloadIndex(indexUrl);
    indexFile.deleteOnExit();

    SamReader reader = createFromCommandLine(cmd).open(SamInputResource.of(bamUrl).index(indexFile));

    BAMIndex bamIndex;
    if (indexFile.getPath().contains(".crai")) {
        SeekableStream craiIndex = CRAIIndex.openCraiFileAsBaiStream(indexFile, reader.getFileHeader().getSequenceDictionary());
        bamIndex = new DiskBasedBAMFileIndex(craiIndex, reader.getFileHeader().getSequenceDictionary());
    } else {
        bamIndex = new DiskBasedBAMFileIndex(indexFile, reader.getFileHeader().getSequenceDictionary(), false);
    }

    Optional<Pair<QueryInterval[], BAMFileSpan>> queryIntervalsAndSpan = queryIntervalsAndSpan(reader, bamIndex, cmd);
    Optional<Chunk> unmappedChunk = getUnmappedChunk(bamIndex, HttpUtils.getHeaderField(bamUrl, "Content-Length"), cmd);
    List<Chunk> sliceChunks = sliceChunks(queryIntervalsAndSpan, unmappedChunk);
    SamReader cachingReader = createCachingReader(indexFile, bamUrl, cmd, sliceChunks);

    SAMFileWriter writer = new SAMFileWriterFactory().setCreateIndex(true)
            .makeBAMWriter(reader.getFileHeader(), true, new File(cmd.getOptionValue(OUTPUT)));

    queryIntervalsAndSpan.ifPresent(pair -> {
        LOGGER.info("Slicing bam on bed regions...");
        CloseableIterator<SAMRecord> bedIterator = getIterator(cachingReader, pair.getKey(), pair.getValue().toCoordinateArray());
        writeToSlice(writer, bedIterator);
        LOGGER.info("Done writing bed slices.");
    });

    unmappedChunk.ifPresent(chunk -> {
        LOGGER.info("Slicing unmapped reads...");
        CloseableIterator<SAMRecord> unmappedIterator = cachingReader.queryUnmapped();
        writeToSlice(writer, unmappedIterator);
        LOGGER.info("Done writing unmapped reads.");
    });

    reader.close();
    writer.close();
    cachingReader.close();
}
 
Example #5
Source File: BamSlicerApplication.java    From hmftools with GNU General Public License v3.0 5 votes vote down vote up
@NotNull
private static Optional<Pair<QueryInterval[], BAMFileSpan>> queryIntervalsAndSpan(@NotNull SamReader reader, @NotNull BAMIndex bamIndex,
        @NotNull CommandLine cmd) throws IOException {
    if (cmd.hasOption(BED)) {
        String bedPath = cmd.getOptionValue(BED);
        LOGGER.info("Reading query intervals from BED file: {}", bedPath);
        QueryInterval[] intervals = getIntervalsFromBED(bedPath, reader.getFileHeader());
        BAMFileSpan span = BAMFileReader.getFileSpan(intervals, bamIndex);
        return Optional.of(Pair.of(intervals, span));
    }
    return Optional.empty();
}
 
Example #6
Source File: BamIndexStats.java    From picard with MIT License 5 votes vote down vote up
/**
 * Main method for the program.  Checks that input file is present and
 * readable, then iterates through the index printing meta data to stdout.
 */
protected int doWork() {

    if (INPUT.getName().endsWith(BAMIndex.BAMIndexSuffix))
           log.warn("INPUT should be the BAM file name, not its index file");
    IOUtil.assertFileIsReadable(INPUT);
    BAMIndexMetaData.printIndexStats(INPUT);

    return 0;
}
 
Example #7
Source File: BAMInputFormat.java    From Hadoop-BAM with MIT License 5 votes vote down vote up
static List<InputSplit> removeIndexFiles(List<InputSplit> splits) {
// Remove any splitting bai files
return splits.stream()
		.filter(split -> !((FileSplit) split).getPath().getName().endsWith(
				SplittingBAMIndexer.OUTPUT_FILE_EXTENSION))
                              .filter(split -> !((FileSplit) split).getPath().getName().endsWith(
                                              BAMIndex.BAMIndexSuffix))
		.collect(Collectors.toList());
      }
 
Example #8
Source File: BAMInputFormat.java    From Hadoop-BAM with MIT License 4 votes vote down vote up
static Path getBAIPath(Path path) {
	return path.suffix(BAMIndex.BAMIndexSuffix);
}
 
Example #9
Source File: BAMTestUtil.java    From Hadoop-BAM with MIT License 4 votes vote down vote up
public static File writeBamFile(int numPairs, SAMFileHeader.SortOrder sortOrder)
    throws IOException {
  // file will be both queryname and coordinate sorted, so use one or the other
  SAMRecordSetBuilder samRecordSetBuilder = new SAMRecordSetBuilder(true, sortOrder);
  for (int i = 0; i < numPairs; i++) {
    int chr = 20;
    int start1 = (i + 1) * 1000;
    int start2 = start1 + 100;
    if (i == 5) { // add two unmapped fragments instead of a mapped pair
      samRecordSetBuilder.addFrag(String.format("test-read-%03d-1", i), chr, start1,
          false, true, null,
          null,
          -1, false);
      samRecordSetBuilder.addFrag(String.format("test-read-%03d-2", i), chr, start2,
          false, true, null,
          null,
          -1, false);
    } else {
      samRecordSetBuilder.addPair(String.format("test-read-%03d", i), chr, start1,
          start2);
    }
  }
  if (numPairs > 0) { // add two unplaced unmapped fragments if non-empty
    samRecordSetBuilder.addUnmappedFragment(String.format
        ("test-read-%03d-unplaced-unmapped", numPairs++));
    samRecordSetBuilder.addUnmappedFragment(String.format
        ("test-read-%03d-unplaced-unmapped", numPairs++));
  }

  final File bamFile = File.createTempFile("test", ".bam");
  bamFile.deleteOnExit();
  SAMFileHeader samHeader = samRecordSetBuilder.getHeader();
  final SAMFileWriter bamWriter = new SAMFileWriterFactory()
      .makeSAMOrBAMWriter(samHeader, true, bamFile);
  for (final SAMRecord rec : samRecordSetBuilder.getRecords()) {
    bamWriter.addAlignment(rec);
  }
  bamWriter.close();

  // create BAM index
  if (sortOrder.equals(SAMFileHeader.SortOrder.coordinate)) {
    SamReader samReader = SamReaderFactory.makeDefault()
        .enable(SamReaderFactory.Option.INCLUDE_SOURCE_IN_RECORDS)
        .open(bamFile);
    BAMIndexer.createIndex(samReader, new File(bamFile.getAbsolutePath()
        .replaceFirst("\\.bam$", BAMIndex.BAMIndexSuffix)));
  }

  return bamFile;
}