htsjdk.samtools.SAMRecordIterator Java Examples

The following examples show how to use htsjdk.samtools.SAMRecordIterator. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: MapBarcodesByEditDistanceTest.java    From Drop-seq with MIT License 6 votes vote down vote up
private UMISharingData readUMISharingData (File f, String cellBarcode, ParentEditDistanceMatcher matcher) {
	Map<String, Set<TagValues>> umisPerBarcode = new HashMap<String, Set<TagValues>>();
	ObjectCounter<String> barcodes = new ObjectCounter<>();
	
	SamReader reader = SamReaderFactory.makeDefault().open(UmiSharingData);
	SAMRecordIterator iter = reader.iterator();
	while (iter.hasNext()) {
		SAMRecord r = iter.next();
		String barcode = r.getStringAttribute(cellBarcode);
		TagValues v = matcher.getValues(r);
		Set<TagValues> s = umisPerBarcode.get(barcode);
		if (s==null) {
			s = new HashSet<TagValues>();
			umisPerBarcode.put(barcode, s);
		}			
		if (!s.contains(v))
			barcodes.increment(barcode);
		s.add(v);
	}
	UMISharingData result = new UMISharingData();
	result.barcodes=barcodes;
	result.umisPerBarcode=umisPerBarcode;
	return result;
}
 
Example #2
Source File: BamSlicer.java    From hmftools with GNU General Public License v3.0 6 votes vote down vote up
public void slice(@NotNull final SamReader samReader, final List<GenomeRegion> regions, @NotNull final Consumer<SAMRecord> consumer)
{
    mConsumerHalt = false;

    final QueryInterval[] queryIntervals = createIntervals(regions, samReader.getFileHeader());

    try (final SAMRecordIterator iterator = samReader.queryOverlapping(queryIntervals))
    {
        while (!mConsumerHalt && iterator.hasNext())
        {
            final SAMRecord record = iterator.next();

            if (passesFilters(record))
            {
                consumer.accept(record);
            }
        }
    }
}
 
Example #3
Source File: BamSlicer.java    From hmftools with GNU General Public License v3.0 6 votes vote down vote up
public void slice(@NotNull final SamReader samReader, final QueryInterval[] queryIntervals, @NotNull final Consumer<SAMRecord> consumer)
{
    mConsumerHalt = false;

    try (final SAMRecordIterator iterator = samReader.queryOverlapping(queryIntervals))
    {
        while (!mConsumerHalt && iterator.hasNext())
        {
            final SAMRecord record = iterator.next();

            if (passesFilters(record))
            {
                consumer.accept(record);
            }
        }
    }
}
 
Example #4
Source File: BamSlicer.java    From hmftools with GNU General Public License v3.0 6 votes vote down vote up
public List<SAMRecord> slice(@NotNull final SamReader samReader, final QueryInterval[] queryIntervals)
{
    final List<SAMRecord> records = Lists.newArrayList();

    try (final SAMRecordIterator iterator = samReader.queryOverlapping(queryIntervals))
    {
        while (iterator.hasNext())
        {
            final SAMRecord record = iterator.next();

            if (passesFilters(record))
            {
                records.add(record);
            }
        }
    }

    return records;
}
 
Example #5
Source File: TestUtils.java    From Drop-seq with MIT License 6 votes vote down vote up
public static void assertSamFilesSame(final File actual, final File expected) {
	final SamReader expectedReader = SamReaderFactory.makeDefault().open(expected);
	final SamReader actualReader = SamReaderFactory.makeDefault().open(actual);
	Assert.assertEquals(expectedReader.getFileHeader(), actualReader.getFileHeader());
	final SAMRecordIterator expectedIterator = expectedReader.iterator();
	final SAMRecordIterator actualIterator = actualReader.iterator();
	while (expectedIterator.hasNext()) {
		if (!actualIterator.hasNext()) {
			Assert.fail(String.format("%s has more records than %s", expected, actual));
		} else {
			Assert.assertEquals(actualIterator.next(), expectedIterator.next());
		}
	}
	if (actualIterator.hasNext()) {
		Assert.fail(String.format("%s has fewer records than %s", expected, actual));
	}
}
 
Example #6
Source File: FastqToSamTest.java    From picard with MIT License 6 votes vote down vote up
private void convertFileAndVerifyRecordCount(final int expectedCount,
                   final String fastqFilename1,
                   final String fastqFilename2,
                   final FastqQualityFormat version,
                   final boolean permissiveFormat,
                   final boolean useSequentialFastqs) throws IOException {
    final File samFile = convertFile(fastqFilename1, fastqFilename2, version, permissiveFormat, useSequentialFastqs);
    final SamReader samReader = SamReaderFactory.makeDefault().open(samFile);
    final SAMRecordIterator iterator = samReader.iterator();
    int actualCount = 0;
    while (iterator.hasNext()) {
        iterator.next();
        actualCount++;
    }
    samReader.close();
    Assert.assertEquals(actualCount, expectedCount);
}
 
Example #7
Source File: MergeBamAlignmentTest.java    From picard with MIT License 6 votes vote down vote up
@Test(dataProvider="data")
public void testSortingOnSamAlignmentMerger(final File unmapped, final File aligned, final boolean sorted, final boolean coordinateSorted, final String testName)
    throws IOException {

    final File target = File.createTempFile("target", "bam");
    target.deleteOnExit();
    final SamAlignmentMerger merger = new SamAlignmentMerger(unmapped,  target, fasta, null, true, false,
            false, Collections.singletonList(aligned), 1, null, null, null, null, null, null,
            Collections.singletonList(SamPairUtil.PairOrientation.FR),
            coordinateSorted ? SAMFileHeader.SortOrder.coordinate : SAMFileHeader.SortOrder.queryname,
            new BestMapqPrimaryAlignmentSelectionStrategy(), false, false, 30);

    merger.mergeAlignment(Defaults.REFERENCE_FASTA);
    Assert.assertEquals(sorted, !merger.getForceSort());
    final SAMRecordIterator it = SamReaderFactory.makeDefault().open(target).iterator();
    int aln = 0;
    while (it.hasNext()) {
        final SAMRecord rec = it.next();
        if (!rec.getReadUnmappedFlag()) {
            aln++;
        }
    }
    Assert.assertEquals(aln, 6, "Incorrect number of aligned reads in merged BAM file");
}
 
Example #8
Source File: SetNmMdAndUqTagsTest.java    From picard with MIT License 5 votes vote down vote up
private void validateUq(final File input, final File reference) {
    final SamReader reader = SamReaderFactory.makeDefault().referenceSequence(reference).open(input);
    final SAMRecordIterator iterator = reader.iterator();
    while (iterator.hasNext()){
        SAMRecord rec = iterator.next();
        if (!rec.getReadUnmappedFlag())
            Assert.assertNotNull(rec.getAttribute("UQ"));
    }
}
 
Example #9
Source File: BamOverlapChecker.java    From systemsgenetics with GNU General Public License v3.0 5 votes vote down vote up
public BamOverlapChecker(SamReader bam_file){
    
    SAMFileHeader  header = bam_file.getFileHeader();
    SAMSequenceDictionary dict = header.getSequenceDictionary();
    List<SAMSequenceRecord> sequences = dict.getSequences();
   
    
    booleanMap = new HashMap<String, boolean[]>();
    
    for(SAMSequenceRecord sequence : sequences){
        int sequenceEnd = sequence.getSequenceLength();
        int arrayLength = (int) Math.ceil( (float) sequenceEnd / (float)stepSize );
        boolean[] tempArray;
        tempArray = new boolean[arrayLength];
        
        for(int i=0;i<arrayLength;i++){
            SAMRecordIterator bamQuery = bam_file.queryOverlapping(sequence.getSequenceName(), i*stepSize, (i+1)*stepSize);
            if(bamQuery.hasNext()){
                tempArray[i] = true;
            }else{
                tempArray[i] = false;
            }
            bamQuery.close();
        }
        booleanMap.put(sequence.getSequenceName(),tempArray );
        //System.out.println("Finished checking the bam for chromosome " + sequence.getSequenceName());
    }
    
}
 
Example #10
Source File: BAMIOITCase.java    From dataflow-java with Apache License 2.0 5 votes vote down vote up
@Test
public void openBAMTest() throws IOException {
 GCSOptions popts = PipelineOptionsFactory.create().as(GCSOptions.class);
 final Storage.Objects storageClient = Transport.newStorageClient(popts).build().objects();

 SamReader samReader = BAMIO.openBAM(storageClient, TEST_BAM_FNAME, ValidationStringency.DEFAULT_STRINGENCY);
 SAMRecordIterator iterator =  samReader.query("1", 550000, 560000, false);
 int readCount = 0;
 while (iterator.hasNext()) {
     iterator.next();
     readCount++;
 }
 Assert.assertEquals("Unexpected count of unmapped reads",
	  EXPECTED_UNMAPPED_READS_COUNT, readCount);
}
 
Example #11
Source File: HeaderInfo.java    From dataflow-java with Apache License 2.0 5 votes vote down vote up
public static HeaderInfo getHeaderFromBAMFile(Storage.Objects storage, String BAMPath, Iterable<Contig> explicitlyRequestedContigs) throws IOException {
  HeaderInfo result = null;

  // Open and read start of BAM
  LOG.info("Reading header from " + BAMPath);
  final SamReader samReader = BAMIO
      .openBAM(storage, BAMPath, ValidationStringency.DEFAULT_STRINGENCY);
  final SAMFileHeader header = samReader.getFileHeader();
  Contig firstContig = getFirstExplicitContigOrNull(header, explicitlyRequestedContigs);
  if (firstContig == null) {
    final SAMSequenceRecord seqRecord = header.getSequence(0);
    firstContig = new Contig(seqRecord.getSequenceName(), -1, -1);
  }

  LOG.info("Reading first chunk of reads from " + BAMPath);
  final SAMRecordIterator recordIterator = samReader.query(
      firstContig.referenceName, (int)firstContig.start + 1, (int)firstContig.end + 1, false);

  Contig firstShard = null;
  while (recordIterator.hasNext() && result == null) {
    SAMRecord record = recordIterator.next();
    final int alignmentStart = record.getAlignmentStart();
    if (firstShard == null && alignmentStart > firstContig.start &&
        (alignmentStart < firstContig.end || firstContig.end == -1)) {
      firstShard = new Contig(firstContig.referenceName, alignmentStart, alignmentStart);
      LOG.info("Determined first shard to be " + firstShard);
      result = new HeaderInfo(header, firstShard);
    }
  }
  recordIterator.close();
  samReader.close();

  if (result == null) {
    throw new IOException("Did not find reads for the first contig " + firstContig.toString());
  }
  LOG.info("Finished header reading from " + BAMPath);
  return result;
}
 
Example #12
Source File: BAMDiff.java    From dataflow-java with Apache License 2.0 5 votes vote down vote up
boolean compareRecords(SAMRecordIterator it1, SAMRecordIterator it2) throws Exception {
  PeekIterator<SAMRecord> pit1 = new PeekIterator<SAMRecord>(it1);
  PeekIterator<SAMRecord> pit2 = new PeekIterator<SAMRecord>(it2);

  do {
    SameCoordReadSet reads1 = getSameCoordReads(pit1, BAMFile1);
    SameCoordReadSet reads2 = getSameCoordReads(pit2, BAMFile2);

    if (reads1 == null) {
      if (reads2 == null) {
        return true;
      } else {
        error(BAMFile1 + " reads exhausted but there are still reads at " +
            reads2.reference + ":" + reads2.coord + " in " + BAMFile2);
        return false;
      }
    } else {
      if (reads2 == null) {
        error(BAMFile2 + " reads exhausted but there are still reads at " +
            reads1.reference + ":" + reads1.coord + " in " + BAMFile1);
        return false;
      } else {
        processedLoci++;
        if (!compareSameCoordReads(reads1, reads2)) {
          return false;
        }
        LOG.fine("Same reads at " + reads1.reference + ":" + reads1.coord);
      }
    }
    if (processedLoci % 100000000 == 0) {
      LOG.info("Working..., processed " +
          processedLoci + " loci, " + processedReads + " reads.");
    }
  } while(true);
}
 
Example #13
Source File: CleanSamTester.java    From picard with MIT License 5 votes vote down vote up
protected void test() {
    try {
        final SamFileValidator validator = new SamFileValidator(new PrintWriter(System.out), 8000);

        // Validate it has the expected cigar
        validator.setIgnoreWarnings(true);
        validator.setVerbose(true, 1000);
        validator.setErrorsToIgnore(Arrays.asList(SAMValidationError.Type.MISSING_READ_GROUP));
        SamReaderFactory factory = SamReaderFactory.makeDefault().validationStringency(ValidationStringency.LENIENT);
        SamReader samReader = factory.open(getOutput());
        final SAMRecordIterator iterator = samReader.iterator();
        while (iterator.hasNext()) {
            final SAMRecord rec = iterator.next();
            Assert.assertEquals(rec.getCigarString(), expectedCigar);
            if (SAMUtils.hasMateCigar(rec)) {
                Assert.assertEquals(SAMUtils.getMateCigarString(rec), expectedCigar);
            }
        }
        CloserUtil.close(samReader);

        // Run validation on the output file
        samReader = factory.open(getOutput());
        final boolean validated = validator.validateSamFileVerbose(samReader, null);
        CloserUtil.close(samReader);

        Assert.assertTrue(validated, "ValidateSamFile failed");
    } finally {
        IOUtil.recursiveDelete(getOutputDir().toPath());
    }
}
 
Example #14
Source File: MNVValidator.java    From hmftools with GNU General Public License v3.0 5 votes vote down vote up
@NotNull
private SAMRecordIterator queryBam(@NotNull final PotentialMNVRegion potentialMnvRegion) {
    final int referenceIndex = getReferenceIndex(potentialMnvRegion);
    final QueryInterval[] queryIntervals =
            new QueryInterval[] { new QueryInterval(referenceIndex, potentialMnvRegion.start(), potentialMnvRegion.end() - 1) };
    return tumorReader().queryOverlapping(queryIntervals);
}
 
Example #15
Source File: MNVValidator.java    From hmftools with GNU General Public License v3.0 5 votes vote down vote up
@NotNull
public List<VariantContext> mergeVariants(@NotNull final PotentialMNVRegion potentialMnvRegion, @NotNull final MNVMerger merger) {
    if (potentialMnvRegion.potentialMnvs().size() == 0) {
        return potentialMnvRegion.variants();
    } else {
        final SAMRecordIterator samIterator = queryBam(potentialMnvRegion);
        final MNVRegionValidator regionValidator = validateMNVs(samIterator, potentialMnvRegion);
        samIterator.close();
        return outputVariants(regionValidator, merger);
    }
}
 
Example #16
Source File: ChromosomeReadCount.java    From hmftools with GNU General Public License v3.0 5 votes vote down vote up
@Override
public ChromosomeReadCount call() throws Exception {
    LOGGER.info("Generating windows on chromosome {}", chromosome);

    try (final SamReader reader = readerFactory.open(inputFile)) {
        final SAMRecordIterator iterator = reader.query(chromosome, 0, 0, true);
        while (iterator.hasNext()) {
            addRecord(iterator.next());
        }
    }
    return this;
}
 
Example #17
Source File: SamSlicer.java    From hmftools with GNU General Public License v3.0 5 votes vote down vote up
public void slice(@NotNull final SamReader samReader, @NotNull final Consumer<SAMRecord> consumer) {
    final QueryInterval[] queryIntervals = createIntervals(regions, samReader.getFileHeader());

    try (final SAMRecordIterator iterator = samReader.queryOverlapping(queryIntervals)) {
        while (iterator.hasNext()) {
            final SAMRecord record = iterator.next();
            if (samRecordMeetsQualityRequirements(record)) {
                consumer.accept(record);
            }
        }
    }
}
 
Example #18
Source File: SAMSlicer.java    From hmftools with GNU General Public License v3.0 5 votes vote down vote up
public void slice(@NotNull final SamReader samReader, @NotNull final Consumer<SAMRecord> consumer) {
    final Set<String> processed = Sets.newHashSet();
    final QueryInterval[] queryIntervals = createIntervals(regions, samReader.getFileHeader());

    try (final SAMRecordIterator iterator = samReader.queryOverlapping(queryIntervals)) {
        while (iterator.hasNext()) {
            final SAMRecord record = iterator.next();
            if (samRecordMeetsQualityRequirements(record)) {
                if (processed.add(record.toString())) {
                    consumer.accept(record);
                }
            }
        }
    }
}
 
Example #19
Source File: TrimStartingSequenceTest.java    From Drop-seq with MIT License 5 votes vote down vote up
@Test
public void testTrimStartingSequenceClp() throws IOException {
    final TrimStartingSequence clp = new TrimStartingSequence();
    clp.INPUT = INPUT;
    clp.OUTPUT = File.createTempFile("TrimStartingSequenceTest.", ".sam");
    clp.OUTPUT.deleteOnExit();
    clp.OUTPUT_SUMMARY = File.createTempFile("TrimStartingSequenceTest.", ".summary");
    clp.OUTPUT_SUMMARY.deleteOnExit();
    clp.SEQUENCE = "AAGCAGTGGTATCAACGCAGAGTGAATGGG";
    clp.MISMATCHES=0;
    clp.NUM_BASES=5;
    Assert.assertEquals(clp.doWork(), 0);

    // Confirm that the expected stuff is trimmed.
    final SamReader outputReader = SamReaderFactory.makeDefault().open(clp.OUTPUT);
    final SAMRecordIterator outputIterator = outputReader.iterator();
    final SamReader inputReader = SamReaderFactory.makeDefault().open(clp.INPUT);
    for (final SAMRecord inputRec: inputReader) {
        Assert.assertTrue(outputIterator.hasNext());
        final SAMRecord outputRec = outputIterator.next();
        final String inputBases = inputRec.getReadString();
        final String outputBases = outputRec.getReadString();
        Assert.assertTrue(inputBases.endsWith(outputBases));
        final String trimmedBases = inputBases.substring(0, inputBases.length() - outputBases.length());
        Assert.assertTrue(clp.SEQUENCE.endsWith(trimmedBases));
    }
    Assert.assertFalse(outputIterator.hasNext());
    CloserUtil.close(Arrays.asList(outputReader, inputReader));
}
 
Example #20
Source File: WriteBAIFn.java    From dataflow-java with Apache License 2.0 4 votes vote down vote up
@ProcessElement
public void processElement(DoFn<String, String>.ProcessContext c) throws Exception {
  Metrics.counter(WriteBAIFn.class, "Initialized Indexing Shard Count").inc();
  Stopwatch stopWatch = Stopwatch.createStarted();

  final HeaderInfo header = c.sideInput(headerView);
  final String bamFilePath = c.sideInput(writtenBAMFilerView);
  final Iterable<KV<Integer, Long>> sequenceShardSizes = c.sideInput(sequenceShardSizesView);

  final String sequenceName = c.element();
  final int sequenceIndex = header.header.getSequence(sequenceName).getSequenceIndex();
  final String baiFilePath = bamFilePath + "-" +
      String.format("%02d",sequenceIndex) + "-" + sequenceName + ".bai";

  long offset = 0;
  int skippedReferences  = 0;
  long bytesToProcess = 0;

  for (KV<Integer, Long> sequenceShardSize : sequenceShardSizes) {
    if (sequenceShardSize.getKey() < sequenceIndex) {
      offset += sequenceShardSize.getValue();
      skippedReferences++;
    } else if (sequenceShardSize.getKey() == sequenceIndex) {
      bytesToProcess = sequenceShardSize.getValue();
    }
  }
  LOG.info("Generating BAI index: " + baiFilePath);
  LOG.info("Reading BAM file: " + bamFilePath + " for reference " + sequenceName +
      ", skipping " + skippedReferences + " references at offset " + offset +
      ", expecting to process " + bytesToProcess + " bytes");

  Options options = c.getPipelineOptions().as(Options.class);
  final Storage.Objects storage = Transport.newStorageClient(
      options
        .as(GCSOptions.class))
        .build()
        .objects();
  final SamReader reader = BAMIO.openBAM(storage, bamFilePath, ValidationStringency.SILENT, true,
      offset);
  final OutputStream outputStream =
      Channels.newOutputStream(
          new GcsUtil.GcsUtilFactory().create(options)
            .create(GcsPath.fromUri(baiFilePath),
                BAMIO.BAM_INDEX_FILE_MIME_TYPE));
  final BAMShardIndexer indexer = new BAMShardIndexer(outputStream, reader.getFileHeader(), sequenceIndex);

  long processedReads = 0;
  long skippedReads = 0;

  // create and write the content
  if (bytesToProcess > 0) {
    SAMRecordIterator it = reader.iterator();
    boolean foundRecords = false;
    while (it.hasNext()) {
      SAMRecord r = it.next();
      if (!r.getReferenceName().equals(sequenceName)) {
        if (foundRecords) {
          LOG.info("Finishing index building for " + sequenceName + " after processing " + processedReads);
          break;
        }
        skippedReads++;
        continue;
      } else if (!foundRecords) {
        LOG.info("Found records for refrence " + sequenceName + " after skipping " + skippedReads);
        foundRecords = true;
      }
      indexer.processAlignment(r);
      processedReads++;
    }
    it.close();
  } else {
    LOG.info("No records for refrence " + sequenceName + ": writing empty index ");
  }
  long noCoordinateReads = indexer.finish();
  c.output(baiFilePath);
  c.output(NO_COORD_READS_COUNT_TAG, noCoordinateReads);
  LOG.info("Generated " + baiFilePath + ", " + processedReads + " reads, " +
      noCoordinateReads + " no coordinate reads, " + skippedReads + ", skipped reads");
  stopWatch.stop();
  Metrics.distribution(WriteBAIFn.class, "Indexing Shard Processing Time (sec)").update(
      stopWatch.elapsed(TimeUnit.SECONDS));
  Metrics.counter(WriteBAIFn.class, "Finished Indexing Shard Count").inc();
  Metrics.counter(WriteBAIFn.class, "Indexed reads").inc(processedReads);
  Metrics.counter(WriteBAIFn.class, "Indexed no coordinate reads").inc(noCoordinateReads);
  Metrics.distribution(WriteBAIFn.class, "Reads Per Indexing Shard").update(processedReads);
}
 
Example #21
Source File: TestBAMRecordView.java    From cramtools with Apache License 2.0 4 votes vote down vote up
@Test
public void test() throws IOException {
	byte[] buf = new byte[1024];
	BAMRecordView view = new BAMRecordView(buf);
	view.setRefID(0);
	view.setAlignmentStart(77);
	view.setMappingScore(44);
	view.setIndexBin(99);
	view.setFlags(555);
	view.setMateRefID(0);
	view.setMateAlStart(78);
	view.setInsertSize(133);

	view.setReadName("name1");
	view.setCigar(TextCigarCodec.decode("10M"));
	view.setBases("AAAAAAAAAA".getBytes());
	view.setQualityScores("BBBBBBBBBB".getBytes());

	int id = 'A' << 16 | 'M' << 8 | 'A';
	view.addTag(id, "Q".getBytes(), 0, 1);

	int len = view.finish();

	System.out.println(Arrays.toString(Arrays.copyOf(buf, len)));

	ByteArrayOutputStream baos = new ByteArrayOutputStream();

	SAMFileHeader header = new SAMFileHeader();
	header.addSequence(new SAMSequenceRecord("14", 14));

	ByteArrayOutputStream baos2 = new ByteArrayOutputStream();
	SAMFileWriter writer = new SAMFileWriterFactory().makeBAMWriter(header, true, baos2);
	SAMRecord record = new SAMRecord(header);
	record.setReferenceIndex(0);
	record.setAlignmentStart(1);
	record.setCigarString("10M");
	record.setFlags(555);
	record.setMappingQuality(44);
	record.setMateReferenceIndex(0);
	record.setMateAlignmentStart(0);
	record.setInferredInsertSize(133);
	record.setReadName("name1");
	record.setReadBases("AAAAAAAAAA".getBytes());
	record.setBaseQualities("BBBBBBBBBB".getBytes());
	record.setAttribute("AM", 'Q');

	System.out.println("BAMFileWriter.addAlignment():");
	writer.addAlignment(record);
	System.out.println(".");
	writer.close();

	System.out.println("------------------------------------------");
	System.out.println();
	System.out.println(new String(baos2.toByteArray()));
	System.out.println();

	SAMFileReader.setDefaultValidationStringency(ValidationStringency.SILENT);
	SAMFileReader reader2 = new SAMFileReader(new ByteArrayInputStream(baos2.toByteArray()));
	SAMRecordIterator iterator = reader2.iterator();
	while (iterator.hasNext()) {
		record = iterator.next();
		System.out.println(record.getSAMString());
	}
	System.out.println("------------------------------------------");

	BlockCompressedOutputStream bcos = new BlockCompressedOutputStream(baos, null);
	bcos.write("BAM\1".getBytes());
	bcos.write(toByteArray(header));
	CramInt.writeInt32(header.getSequenceDictionary().size(), bcos);
	for (final SAMSequenceRecord sequenceRecord : header.getSequenceDictionary().getSequences()) {
		byte[] bytes = sequenceRecord.getSequenceName().getBytes();
		CramInt.writeInt32(bytes.length + 1, bcos);
		bcos.write(sequenceRecord.getSequenceName().getBytes());
		bcos.write(0);
		CramInt.writeInt32(sequenceRecord.getSequenceLength(), bcos);
	}
	bcos.write(buf, 0, len);
	bcos.close();

	System.out.println(new String(baos.toByteArray()));

	SAMFileReader reader = new SAMFileReader(new ByteArrayInputStream(baos.toByteArray()));
	iterator = reader.iterator();
	while (iterator.hasNext()) {
		record = iterator.next();
		System.out.println(record.getSAMString());
	}
	reader.close();

}
 
Example #22
Source File: Transcode.java    From cramtools with Apache License 2.0 4 votes vote down vote up
public static void main(String[] args) throws IOException {
	Params params = new Params();
	JCommander jc = new JCommander(params);
	jc.parse(args);

	Log.setGlobalLogLevel(params.logLevel);

	if (args.length == 0 || params.help) {
		usage(jc);
		System.exit(1);
	}

	if (params.reference == null) {
		System.out.println("Reference file not found, will try downloading...");
	}

	ReferenceSource referenceSource = null;
	if (params.reference != null) {
		System.setProperty("reference", params.reference.getAbsolutePath());
		referenceSource = new ReferenceSource(params.reference);
	} else {
		String prop = System.getProperty("reference");
		if (prop != null) {
			referenceSource = new ReferenceSource(new File(prop));
		}
	}

	SamReaderFactory factory = SamReaderFactory.make().validationStringency(params.validationLevel);
	SamInputResource r;
	if ("file".equalsIgnoreCase(params.url.getProtocol()))
		r = SamInputResource.of(params.url.getPath());
	else
		r = SamInputResource.of(params.url);
	SamReader reader = factory.open(r);
	SAMRecordIterator iterator = reader.iterator();

	SAMFileWriterFactory writerFactory = new SAMFileWriterFactory();
	SAMFileWriter writer = null;
	OutputStream os = new BufferedOutputStream(new FileOutputStream(params.outputFile));
	switch (params.outputFormat) {
	case BAM:
		writer = writerFactory.makeBAMWriter(reader.getFileHeader(),
				reader.getFileHeader().getSortOrder() == SortOrder.coordinate, os);
		break;
	case CRAM:
		writer = writerFactory.makeCRAMWriter(reader.getFileHeader(), os, params.reference);
		break;

	default:
		System.out.println("Unknown output format: " + params.outputFormat);
		System.exit(1);
	}

	while (iterator.hasNext()) {
		writer.addAlignment(iterator.next());
	}
	writer.close();
	reader.close();
}
 
Example #23
Source File: Cram2Fastq.java    From cramtools with Apache License 2.0 4 votes vote down vote up
@Override
public void doRun() throws IOException {
	super.doRun();

	fo.close();

	if (fo.empty)
		return;

	log.info("Sorting overflow BAM: ", fo.file.length());
	SAMFileReader.setDefaultValidationStringency(ValidationStringency.SILENT);
	SAMFileReader r = new SAMFileReader(fo.file);
	SAMRecordIterator iterator = r.iterator();
	if (!iterator.hasNext()) {
		r.close();
		fo.file.delete();
		return;
	}

	SAMRecord r1 = iterator.next();
	SAMRecord r2 = null;
	counter = multiFastqOutputter.getCounter();
	log.info("Counter=" + counter);
	while (!brokenPipe.get() && iterator.hasNext()) {
		r2 = iterator.next();
		if (r1.getReadName().equals(r2.getReadName())) {
			print(r1, r2);
			counter++;
			r1 = null;
			if (!iterator.hasNext())
				break;
			r1 = iterator.next();
			r2 = null;
		} else {
			print(r1, 0);
			r1 = r2;
			r2 = null;
			counter++;
		}
	}
	if (r1 != null)
		print(r1, 0);
	r.close();
	fo.file.delete();
}
 
Example #24
Source File: BoundSAMRecordIterator.java    From cramtools with Apache License 2.0 4 votes vote down vote up
@Override
public SAMRecordIterator assertSorted(SAMFileHeader.SortOrder sortOrder) {
	return delegate.assertSorted(sortOrder);
}
 
Example #25
Source File: BoundSAMRecordIterator.java    From cramtools with Apache License 2.0 4 votes vote down vote up
public BoundSAMRecordIterator(SAMRecordIterator delegate, long firstRecord, long lastRecord) {
	this.delegate = delegate;
	this.firstRecord = firstRecord;
	this.lastRecord = lastRecord;
}
 
Example #26
Source File: Merge.java    From cramtools with Apache License 2.0 4 votes vote down vote up
@Override
public SAMRecordIterator assertSorted(SAMFileHeader.SortOrder sortOrder) {
	// TODO Auto-generated method stub
	return null;
}
 
Example #27
Source File: Merge.java    From cramtools with Apache License 2.0 4 votes vote down vote up
public RecordSource(String id, SAMRecordIterator it) {
	this.id = id;
	this.it = it;
}
 
Example #28
Source File: ReadGenoAndAsFromIndividual.java    From systemsgenetics with GNU General Public License v3.0 4 votes vote down vote up
public static String get_allele_specific_overlap_at_snp(GeneticVariant this_variant,
                                                        int sample_index, 
                                                        String chromosome,
                                                        String position, 
                                                        SamReader bam_file){
    
    int pos_int = Integer.parseInt(position);
    
    Alleles all_variants = this_variant.getVariantAlleles();
    Character ref_allele_char = all_variants.getAllelesAsChars()[0];
    String ref_allele = ref_allele_char.toString(); 
    //System.out.println("ref_allele: " + ref_allele);
    Character alt_allele_char = all_variants.getAllelesAsChars()[1];
    String alt_allele = alt_allele_char.toString(); 
    //System.out.println("alt_allele: " + alt_allele);
   
    
    
    int ref_overlap = 0;
    int alt_overlap = 0;
    int no__overlap = 0;
    
    

   
    // now determine per individual the sample variants.
    // I'm assuming the ordering is the same as the individual names created 
    // by the  getSampleNames() method. 
    // Otherwise the data will be nicely permuted, and I will have to convert some stuff.
    
    int position_of_snp = Integer.parseInt(position);                       
    
    //Check to make sure the variant position is not 0.
    if(position_of_snp <= 0){
        System.out.println("A SNP was read with a position lower than 1. This is illegal");
        System.out.println("Please adapt your genotype files by removing SNPs with these illegal positions");
        System.out.println("\tchr: " + chromosome + " pos: " + position);
        throw new IllegalDataException("Variant Position was less than 1");
    }
    
    SAMRecordIterator all_reads_in_region;
    try{
        all_reads_in_region = bam_file.queryOverlapping(chromosome, position_of_snp, position_of_snp);
    } catch(IllegalArgumentException e){
        System.out.println("Found an error when trying the following input:");
        System.out.println("chr:\t"+chromosome);
        System.out.println("pos:\t"+ position);
        System.out.println("If these values look correct, please make sure your bam file is sorted AND indexed by samtools.");
        System.out.println("If the problem persists, perhaps the chromosome (or sequence) are not the same in the genotype or bam file");
        all_reads_in_region = null;
        System.exit(0);
        
    }
    
    String bases = "";
    
    while(all_reads_in_region.hasNext()){
        
        SAMRecord read_in_region = all_reads_in_region.next();
        
        Character base_in_read = get_base_at_position(read_in_region, pos_int);
        if(GlobalVariables.verbosity >= 100){
            System.out.println("base_in_read: " + base_in_read);
        }
        
        if( base_in_read == ref_allele.charAt(0) ){
            ref_overlap++;            
        } else if(base_in_read == alt_allele.charAt(0) ){
            alt_overlap++;
        }else if(base_in_read == '!'  || base_in_read == 'N'){
            continue;
        }else{
            no__overlap++;
        }
    }
    
    //This line below cost me a day to figure out the error.
    all_reads_in_region.close();
    
    String string_for_output;
    string_for_output = Integer.toString(ref_overlap) + "\t" + 
                        Integer.toString(alt_overlap) + "\t" + 
                        Integer.toString(no__overlap);
    
    return(string_for_output);
}
 
Example #29
Source File: ReorderSam.java    From picard with MIT License 4 votes vote down vote up
/**
 * Helper function that writes reads from iterator it into writer out, updating each SAMRecord along the way
 * according to the newOrder mapping from dictionary index -> index.  Name is used for printing only.
 */
private void writeReads(final SAMFileWriter out,
                        final SAMRecordIterator it,
                        final Map<Integer, Integer> newOrder,
                        final String name) {
    long counter = 0;
    log.info("  Processing " + name);

    while (it.hasNext()) {
        counter++;
        final SAMRecord read = it.next();
        final int oldRefIndex = read.getReferenceIndex();
        final int oldMateIndex = read.getMateReferenceIndex();
        final int newRefIndex = newOrderIndex(read, oldRefIndex, newOrder);

        read.setHeader(out.getFileHeader());
        read.setReferenceIndex(newRefIndex);

        // read becoming unmapped
        if (oldRefIndex != NO_ALIGNMENT_REFERENCE_INDEX &&
                newRefIndex == NO_ALIGNMENT_REFERENCE_INDEX) {
            read.setAlignmentStart(NO_ALIGNMENT_START);
            read.setReadUnmappedFlag(true);
            read.setCigarString(SAMRecord.NO_ALIGNMENT_CIGAR);
            read.setMappingQuality(SAMRecord.NO_MAPPING_QUALITY);
        }

        final int newMateIndex = newOrderIndex(read, oldMateIndex, newOrder);
        if (oldMateIndex != NO_ALIGNMENT_REFERENCE_INDEX &&
                newMateIndex == NO_ALIGNMENT_REFERENCE_INDEX) { // mate becoming unmapped
            read.setMateAlignmentStart(NO_ALIGNMENT_START);
            read.setMateUnmappedFlag(true);
            read.setAttribute(SAMTag.MC.name(), null);      // Set the Mate Cigar String to null
        }
        read.setMateReferenceIndex(newMateIndex);

        out.addAlignment(read);
    }

    it.close();
    log.info("Wrote " + counter + " reads");
}
 
Example #30
Source File: Reader.java    From dataflow-java with Apache License 2.0 4 votes vote down vote up
/**
 * To compare how sharded reading works vs. plain HTSJDK sequential iteration,
 * this method implements such iteration.
 * This makes it easier to discover errors such as reads that are somehow
 * skipped by a sharded approach.
 */
public static Iterable<Read> readSequentiallyForTesting(Objects storageClient,
    String storagePath, Contig contig,
    ReaderOptions options) throws IOException {
  Stopwatch timer = Stopwatch.createStarted();
  SamReader samReader = BAMIO.openBAM(storageClient, storagePath, options.getStringency());
  SAMRecordIterator iterator =  samReader.queryOverlapping(contig.referenceName,
      (int) contig.start + 1,
      (int) contig.end);
  List<Read> reads = new ArrayList<Read>();

  int recordsBeforeStart = 0;
  int recordsAfterEnd = 0;
  int mismatchedSequence = 0;
  int recordsProcessed = 0;
  Filter filter = setupFilter(options, contig.referenceName);
  while (iterator.hasNext()) {
    SAMRecord record = iterator.next();
    final boolean passesFilter = passesFilter(record, filter, contig.referenceName);

    if (!passesFilter) {
      mismatchedSequence++;
      continue;
    }
    if (record.getAlignmentStart() < contig.start) {
      recordsBeforeStart++;
      continue;
    }
    if (record.getAlignmentStart() > contig.end) {
      recordsAfterEnd++;
      continue;
    }
    reads.add(ReadUtils.makeReadGrpc(record));
    recordsProcessed++;
  }
  timer.stop();
  LOG.info("NON SHARDED: Processed " + recordsProcessed +
      " in " + timer +
      ". Speed: " + (recordsProcessed*1000)/timer.elapsed(TimeUnit.MILLISECONDS) + " reads/sec"
      + ", skipped other sequences " + mismatchedSequence
      + ", skippedBefore " + recordsBeforeStart
      + ", skipped after " + recordsAfterEnd);
  return reads;
}