htsjdk.samtools.SAMFormatException Java Examples

The following examples show how to use htsjdk.samtools.SAMFormatException. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: SamToFastqTest.java    From picard with MIT License 5 votes vote down vote up
@Test (dataProvider = "badFiles", expectedExceptions= SAMFormatException.class)
public void testBadFile(final String samFilename) throws IOException {
    final File samFile = new File(TEST_DATA_DIR,samFilename);
    final File pair1 = File.createTempFile("tt-pair1.", ".fastq");
    final File pair2 = File.createTempFile("tt-pair2.", ".fastq");
    pair1.deleteOnExit();
    pair2.deleteOnExit();
    convertFile(new String[]{
          "INPUT=" + samFile.getAbsolutePath(),
          "FASTQ=" + pair1.getAbsolutePath(),
          "SECOND_END_FASTQ=" + pair2.getAbsolutePath()
    });
}
 
Example #2
Source File: SamToFastqTest.java    From picard with MIT License 5 votes vote down vote up
@Test (dataProvider = "badGroupedFiles", expectedExceptions=SAMFormatException.class)
public void testBadGroupedFile(final String samFilename) throws IOException {
    final File pair1File = newTempFastqFile("pair1");
    final File pair2File = newTempFastqFile("pair2");

    convertFile(new String[]{
            "INPUT=" + TEST_DATA_DIR + "/" + samFilename,
            "FASTQ=" + pair1File.getAbsolutePath(),
            "SECOND_END_FASTQ=" + pair2File.getAbsolutePath()
    });
}
 
Example #3
Source File: SortSamTest.java    From picard with MIT License 5 votes vote down vote up
@Test
public void bugTest() throws Exception {
    File input = File.createTempFile("testIn",".bam");
    File output = File.createTempFile("testOut",".bam");
    FileUtils.write(input, "not valid sam input");
    try {
        new SortSam().instanceMain(new String[]{
                "I=" + input.getPath(),
                "O=" + output.getPath(),
                "SORT_ORDER=coordinate"});
    } catch (SAMFormatException ex) {}
    Files.delete(output.toPath());
}
 
Example #4
Source File: BAMSplitGuesser.java    From Hadoop-BAM with MIT License 5 votes vote down vote up
/** The stream must point to a valid BAM file, because the header is read
 * from it.
 */
public BAMSplitGuesser(
		SeekableStream ss, Configuration conf)
	throws IOException
{
	this(ss, ss, conf);

	// Secondary check that the header points to a BAM file: Picard can get
	// things wrong due to its autodetection.
	ss.seek(0);
	if (ss.read(buf.array(), 0, 4) != 4 || buf.getInt(0) != BGZF_MAGIC)
		throw new SAMFormatException("Does not seem like a BAM file");
}
 
Example #5
Source File: SAMFileHeader_Utils.java    From cramtools with Apache License 2.0 5 votes vote down vote up
private static SAMSequenceRecord readSequenceRecord(final BinaryCodec stream, final String source) {
	final int nameLength = stream.readInt();
	if (nameLength <= 1) {
		throw new SAMFormatException("Invalid BAM file header: missing sequence name in file " + source);
	}
	final String sequenceName = stream.readString(nameLength - 1);
	// Skip the null terminator
	stream.readByte();
	final int sequenceLength = stream.readInt();
	return new SAMSequenceRecord(SAMSequenceRecord.truncateSequenceName(sequenceName), sequenceLength);
}
 
Example #6
Source File: pullLargeLengths.java    From HMMRATAC with GNU General Public License v3.0 4 votes vote down vote up
/**
 * Read the data and create a list of lengths
 */
private void read(){
	int counter = 0;
	SAMFileReader reader = new SAMFileReader(bam,index);
	ArrayList<Double> temp = new ArrayList<Double>();
	for (int i = 0; i < genome.size();i++){
		String chr = genome.get(i).getChrom();
		int start = genome.get(i).getStart();
		int stop = genome.get(i).getStop();
		CloseableIterator<SAMRecord> iter = reader.query(chr,start,stop,false);
		while (iter.hasNext()){
			SAMRecord record = null;
			try{
				record = iter.next();
			}
			catch(SAMFormatException ex){
				System.out.println("SAM Record is problematic. Has mapQ != 0 for unmapped read. Will continue anyway");
			}
			if(record != null){
			if(!record.getReadUnmappedFlag() && !record.getMateUnmappedFlag() && record.getFirstOfPairFlag()) {
				if (record.getMappingQuality() >= minQ){
					
					if (Math.abs(record.getInferredInsertSize()) > 100 && Math.abs(record.getInferredInsertSize())
							< 1000){
						counter+=1;
						temp.add((double)Math.abs(record.getInferredInsertSize()));
					}
				}
			}
			}
		}
		iter.close();
	}
	reader.close();
	lengths = new double[counter];
	for (int i = 0;i < temp.size();i++){
		if (temp.get(i) > 100){
			lengths[i] = temp.get(i);
		}
	}
	
}
 
Example #7
Source File: SAMRecordReader.java    From Hadoop-BAM with MIT License 4 votes vote down vote up
@Override public void initialize(InputSplit spl, TaskAttemptContext ctx)
	throws IOException
{
	// This method should only be called once (see Hadoop API). However,
	// there seems to be disagreement between implementations that call
	// initialize() and Hadoop-BAM's own code that relies on
	// {@link SAMInputFormat} to call initialize() when the reader is
	// created. Therefore we add this check for the time being. 
	if(isInitialized)
		close();
	isInitialized = true;

	final FileSplit split = (FileSplit)spl;

	this.start =         split.getStart();
	this.end   = start + split.getLength();

	final Configuration conf = ctx.getConfiguration();

	final ValidationStringency stringency =
		SAMHeaderReader.getValidationStringency(conf);

	final Path file = split.getPath();
	final FileSystem fs = file.getFileSystem(conf);

	input = fs.open(file);

	// SAMFileReader likes to make our life difficult, so complexity ensues.
	// The basic problem is that SAMFileReader buffers its input internally,
	// which causes two issues.
	//
	// Issue #1 is that SAMFileReader requires that its input begins with a
	// SAM header. This is not fine for reading from the middle of a file.
	// Because of the buffering, if we have the reader read the header from
	// the beginning of the file and then seek to where we want to read
	// records from, it'll have buffered some records from immediately after
	// the header, which is no good. Thus we need to read the header
	// separately and then use a custom stream that wraps the input stream,
	// inserting the header at the beginning of it. (Note the spurious
	// re-encoding of the header so that the reader can decode it.)
	//
	// Issue #2 is handling the boundary between two input splits. The best
	// way seems to be the classic "in later splits, skip the first line, and
	// in every split finish reading a partial line at the end of the split",
	// but that latter part is a bit complicated here. Due to the buffering,
	// we can easily overshoot: as soon as the stream moves past the end of
	// the split, SAMFileReader has buffered some records past the end. The
	// basic fix here is to have our custom stream count the number of bytes
	// read and to stop after the split size. Unfortunately this prevents us
	// from reading the last partial line, so our stream actually allows
	// reading to the next newline after the actual end.

	final SAMFileHeader header = createSamReader(input, stringency).getFileHeader();

	waInput = new WorkaroundingStream(input, header);

	final boolean firstSplit = this.start == 0;

	if (firstSplit) {
		// Skip the header because we already have it, and adjust the start
		// to match.
		final int headerLength = waInput.getRemainingHeaderLength();
		input.seek(headerLength);
		this.start += headerLength;
	} else
		input.seek(--this.start);

	// Creating the iterator causes reading from the stream, so make sure
	// to start counting this early.
	waInput.setLength(this.end - this.start);

	iterator = createSamReader(waInput, stringency).iterator();

	if (!firstSplit) {
		// Skip the first line, it'll be handled with the previous split.
		try {
			if (iterator.hasNext())
				iterator.next();
		} catch (SAMFormatException e) {}
	}
}
 
Example #8
Source File: SAMFileHeader_Utils.java    From cramtools with Apache License 2.0 4 votes vote down vote up
static SAMFileHeader readHeader(final BinaryCodec stream, final ValidationStringency validationStringency,
		final String source) throws IOException {

	final byte[] buffer = new byte[4];
	stream.readBytes(buffer);
	if (!Arrays.equals(buffer, "BAM\1".getBytes())) {
		throw new IOException("Invalid BAM file header");
	}

	final int headerTextLength = stream.readInt();
	final String textHeader = stream.readString(headerTextLength);
	final SAMTextHeaderCodec headerCodec = new SAMTextHeaderCodec();
	headerCodec.setValidationStringency(validationStringency);
	final SAMFileHeader samFileHeader = headerCodec.decode(new StringLineReader(textHeader), source);

	final int sequenceCount = stream.readInt();
	if (samFileHeader.getSequenceDictionary().size() > 0) {
		// It is allowed to have binary sequences but no text sequences, so
		// only validate if both are present
		if (sequenceCount != samFileHeader.getSequenceDictionary().size()) {
			throw new SAMFormatException("Number of sequences in text header ("
					+ samFileHeader.getSequenceDictionary().size() + ") != number of sequences in binary header ("
					+ sequenceCount + ") for file " + source);
		}
		for (int i = 0; i < sequenceCount; i++) {
			final SAMSequenceRecord binarySequenceRecord = readSequenceRecord(stream, source);
			final SAMSequenceRecord sequenceRecord = samFileHeader.getSequence(i);
			if (!sequenceRecord.getSequenceName().equals(binarySequenceRecord.getSequenceName())) {
				throw new SAMFormatException("For sequence " + i
						+ ", text and binary have different names in file " + source);
			}
			if (sequenceRecord.getSequenceLength() != binarySequenceRecord.getSequenceLength()) {
				throw new SAMFormatException("For sequence " + i
						+ ", text and binary have different lengths in file " + source);
			}
		}
	} else {
		// If only binary sequences are present, copy them into
		// samFileHeader
		final List<SAMSequenceRecord> sequences = new ArrayList<SAMSequenceRecord>(sequenceCount);
		for (int i = 0; i < sequenceCount; i++) {
			sequences.add(readSequenceRecord(stream, source));
		}
		samFileHeader.setSequenceDictionary(new SAMSequenceDictionary(sequences));
	}

	return samFileHeader;
}