Java Code Examples for htsjdk.samtools.BAMIndex

The following examples show how to use htsjdk.samtools.BAMIndex. These examples are extracted from open source projects. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
@NotNull
private static Optional<Chunk> getUnmappedChunk(@NotNull BAMIndex bamIndex, @Nullable String contentLengthString,
        @NotNull CommandLine cmd) {
    if (cmd.hasOption(UNMAPPED)) {
        long startOfLastLinearBin = bamIndex.getStartOfLastLinearBin();
        if (startOfLastLinearBin == -1) {
            LOGGER.warn("Start of last linear bin was -1. No mapped reads found in BAM.");
            return Optional.empty();
        }
        if (contentLengthString != null) {
            try {
                long contentLength = Long.parseLong(contentLengthString);
                // We multiply content length with 2^16 = ~64k. Presumably 'content length' is "in terms of number of 64Kb packets".
                return Optional.of(new Chunk(startOfLastLinearBin, contentLength << 16));
            } catch (NumberFormatException ignored) {
                LOGGER.error("Invalid content length ({}) for bam URL", contentLengthString);
                return Optional.empty();
            }
        }
    }
    return Optional.empty();
}
 
Example 2
Source Project: picard   Source File: MergeSamFilesTest.java    License: MIT License 6 votes vote down vote up
/**
 * Confirm that unsorted input can result in coordinate sorted output, with index created.
 */
@Test
public void unsortedInputSortedOutputTest() throws Exception {
    final File unsortedInputTestDataDir = new File(TEST_DATA_DIR, "unsorted_input");
    final File mergedOutput = File.createTempFile("unsortedInputSortedOutputTest.", BamFileIoUtils.BAM_FILE_EXTENSION);
    mergedOutput.deleteOnExit();
    final File mergedOutputIndex = new File(mergedOutput.getParent(), IOUtil.basename(mergedOutput)+ BAMIndex.BAI_INDEX_SUFFIX);
    mergedOutputIndex.deleteOnExit();

    final String[] args = {
            "I=" + new File(unsortedInputTestDataDir, "1.sam").getAbsolutePath(),
            "I=" + new File(unsortedInputTestDataDir, "2.sam").getAbsolutePath(),
            "O=" + mergedOutput.getAbsolutePath(),
            "CREATE_INDEX=true",
            "SO=coordinate"
    };
    final int mergeExitStatus = runPicardCommandLine(args);
    Assert.assertEquals(mergeExitStatus, 0);
    final SamReader reader = SamReaderFactory.makeDefault().open(mergedOutput);
    Assert.assertEquals(reader.getFileHeader().getSortOrder(), SAMFileHeader.SortOrder.coordinate);
    Assert.assertTrue(reader.hasIndex());
    new ValidateSamTester().assertSamValid(mergedOutput);
    Assert.assertTrue(mergedOutputIndex.delete());
    CloserUtil.close(reader);
}
 
Example 3
Source Project: Hadoop-BAM   Source File: TestBAMInputFormat.java    License: MIT License 6 votes vote down vote up
@Test
public void testMultipleSplitsBaiEnabledSuffixPath() throws Exception {
  input = BAMTestUtil.writeBamFile(1000, SAMFileHeader.SortOrder.coordinate)
      .getAbsolutePath();
  File index = new File(input.replaceFirst("\\.bam$", BAMIndex.BAMIndexSuffix));
  index.renameTo(new File(input + BAMIndex.BAMIndexSuffix));
  completeSetup();
  BAMInputFormat.setEnableBAISplitCalculator(jobContext.getConfiguration(), true);
  jobContext.getConfiguration().setInt(FileInputFormat.SPLIT_MAXSIZE, 40000);
  BAMInputFormat inputFormat = new BAMInputFormat();
  List<InputSplit> splits = inputFormat.getSplits(jobContext);
  assertEquals(3, splits.size());
  List<SAMRecord> split0Records = getSAMRecordsFromSplit(inputFormat, splits.get(0));
  List<SAMRecord> split1Records = getSAMRecordsFromSplit(inputFormat, splits.get(1));
  List<SAMRecord> split2Records = getSAMRecordsFromSplit(inputFormat, splits.get(2));
  assertEquals(1080, split0Records.size());
  assertEquals(524, split1Records.size());
  assertEquals(398, split2Records.size());
}
 
Example 4
private static void sliceFromURLs(@NotNull URL indexUrl, @NotNull URL bamUrl, @NotNull CommandLine cmd) throws IOException {
    File indexFile = downloadIndex(indexUrl);
    indexFile.deleteOnExit();

    SamReader reader = createFromCommandLine(cmd).open(SamInputResource.of(bamUrl).index(indexFile));

    BAMIndex bamIndex;
    if (indexFile.getPath().contains(".crai")) {
        SeekableStream craiIndex = CRAIIndex.openCraiFileAsBaiStream(indexFile, reader.getFileHeader().getSequenceDictionary());
        bamIndex = new DiskBasedBAMFileIndex(craiIndex, reader.getFileHeader().getSequenceDictionary());
    } else {
        bamIndex = new DiskBasedBAMFileIndex(indexFile, reader.getFileHeader().getSequenceDictionary(), false);
    }

    Optional<Pair<QueryInterval[], BAMFileSpan>> queryIntervalsAndSpan = queryIntervalsAndSpan(reader, bamIndex, cmd);
    Optional<Chunk> unmappedChunk = getUnmappedChunk(bamIndex, HttpUtils.getHeaderField(bamUrl, "Content-Length"), cmd);
    List<Chunk> sliceChunks = sliceChunks(queryIntervalsAndSpan, unmappedChunk);
    SamReader cachingReader = createCachingReader(indexFile, bamUrl, cmd, sliceChunks);

    SAMFileWriter writer = new SAMFileWriterFactory().setCreateIndex(true)
            .makeBAMWriter(reader.getFileHeader(), true, new File(cmd.getOptionValue(OUTPUT)));

    queryIntervalsAndSpan.ifPresent(pair -> {
        LOGGER.info("Slicing bam on bed regions...");
        CloseableIterator<SAMRecord> bedIterator = getIterator(cachingReader, pair.getKey(), pair.getValue().toCoordinateArray());
        writeToSlice(writer, bedIterator);
        LOGGER.info("Done writing bed slices.");
    });

    unmappedChunk.ifPresent(chunk -> {
        LOGGER.info("Slicing unmapped reads...");
        CloseableIterator<SAMRecord> unmappedIterator = cachingReader.queryUnmapped();
        writeToSlice(writer, unmappedIterator);
        LOGGER.info("Done writing unmapped reads.");
    });

    reader.close();
    writer.close();
    cachingReader.close();
}
 
Example 5
@NotNull
private static Optional<Pair<QueryInterval[], BAMFileSpan>> queryIntervalsAndSpan(@NotNull SamReader reader, @NotNull BAMIndex bamIndex,
        @NotNull CommandLine cmd) throws IOException {
    if (cmd.hasOption(BED)) {
        String bedPath = cmd.getOptionValue(BED);
        LOGGER.info("Reading query intervals from BED file: {}", bedPath);
        QueryInterval[] intervals = getIntervalsFromBED(bedPath, reader.getFileHeader());
        BAMFileSpan span = BAMFileReader.getFileSpan(intervals, bamIndex);
        return Optional.of(Pair.of(intervals, span));
    }
    return Optional.empty();
}
 
Example 6
Source Project: picard   Source File: BamIndexStats.java    License: MIT License 5 votes vote down vote up
/**
 * Main method for the program.  Checks that input file is present and
 * readable, then iterates through the index printing meta data to stdout.
 */
protected int doWork() {

    if (INPUT.getName().endsWith(BAMIndex.BAMIndexSuffix))
           log.warn("INPUT should be the BAM file name, not its index file");
    IOUtil.assertFileIsReadable(INPUT);
    BAMIndexMetaData.printIndexStats(INPUT);

    return 0;
}
 
Example 7
Source Project: Hadoop-BAM   Source File: BAMInputFormat.java    License: MIT License 5 votes vote down vote up
static List<InputSplit> removeIndexFiles(List<InputSplit> splits) {
// Remove any splitting bai files
return splits.stream()
		.filter(split -> !((FileSplit) split).getPath().getName().endsWith(
				SplittingBAMIndexer.OUTPUT_FILE_EXTENSION))
                              .filter(split -> !((FileSplit) split).getPath().getName().endsWith(
                                              BAMIndex.BAMIndexSuffix))
		.collect(Collectors.toList());
      }
 
Example 8
Source Project: Hadoop-BAM   Source File: BAMInputFormat.java    License: MIT License 4 votes vote down vote up
static Path getBAIPath(Path path) {
	return path.suffix(BAMIndex.BAMIndexSuffix);
}
 
Example 9
Source Project: Hadoop-BAM   Source File: BAMTestUtil.java    License: MIT License 4 votes vote down vote up
public static File writeBamFile(int numPairs, SAMFileHeader.SortOrder sortOrder)
    throws IOException {
  // file will be both queryname and coordinate sorted, so use one or the other
  SAMRecordSetBuilder samRecordSetBuilder = new SAMRecordSetBuilder(true, sortOrder);
  for (int i = 0; i < numPairs; i++) {
    int chr = 20;
    int start1 = (i + 1) * 1000;
    int start2 = start1 + 100;
    if (i == 5) { // add two unmapped fragments instead of a mapped pair
      samRecordSetBuilder.addFrag(String.format("test-read-%03d-1", i), chr, start1,
          false, true, null,
          null,
          -1, false);
      samRecordSetBuilder.addFrag(String.format("test-read-%03d-2", i), chr, start2,
          false, true, null,
          null,
          -1, false);
    } else {
      samRecordSetBuilder.addPair(String.format("test-read-%03d", i), chr, start1,
          start2);
    }
  }
  if (numPairs > 0) { // add two unplaced unmapped fragments if non-empty
    samRecordSetBuilder.addUnmappedFragment(String.format
        ("test-read-%03d-unplaced-unmapped", numPairs++));
    samRecordSetBuilder.addUnmappedFragment(String.format
        ("test-read-%03d-unplaced-unmapped", numPairs++));
  }

  final File bamFile = File.createTempFile("test", ".bam");
  bamFile.deleteOnExit();
  SAMFileHeader samHeader = samRecordSetBuilder.getHeader();
  final SAMFileWriter bamWriter = new SAMFileWriterFactory()
      .makeSAMOrBAMWriter(samHeader, true, bamFile);
  for (final SAMRecord rec : samRecordSetBuilder.getRecords()) {
    bamWriter.addAlignment(rec);
  }
  bamWriter.close();

  // create BAM index
  if (sortOrder.equals(SAMFileHeader.SortOrder.coordinate)) {
    SamReader samReader = SamReaderFactory.makeDefault()
        .enable(SamReaderFactory.Option.INCLUDE_SOURCE_IN_RECORDS)
        .open(bamFile);
    BAMIndexer.createIndex(samReader, new File(bamFile.getAbsolutePath()
        .replaceFirst("\\.bam$", BAMIndex.BAMIndexSuffix)));
  }

  return bamFile;
}