htsjdk.samtools.util.BinaryCodec Java Examples

The following examples show how to use htsjdk.samtools.util.BinaryCodec. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: BamToBfqWriter.java    From picard with MIT License 5 votes vote down vote up
/**
 * Writes out a SAMRecord in Maq fastq format
 *
 * @param codec the code to write to
 * @param rec   the SAMRecord to write
 */
private void writeFastqRecord(final BinaryCodec codec, final SAMRecord rec) {

    // Trim the run barcode off the read name
    String readName = rec.getReadName();
    if (namePrefix != null && readName.startsWith(namePrefix)) {
        readName = readName.substring(nameTrim);
    }
    // Writes the length of the read name and then the name (null-terminated)
    codec.writeString(readName, true, true);

    final char[] seqs = rec.getReadString().toCharArray();
    final char[] quals = rec.getBaseQualityString().toCharArray();

    int retainedLength = seqs.length;
    if (clipAdapters){
        // adjust to a shorter length iff clipping tag exists
        Integer trimPoint = rec.getIntegerAttribute(ReservedTagConstants.XT);
        if (trimPoint != null) {
            ValidationUtils.validateArg(rec.getReadLength() == seqs.length, () -> "length of read and seqs differ. Found " + rec.getReadLength() + " and '" + seqs.length + ".");

            retainedLength = Math.min(seqs.length, Math.max(SEED_REGION_LENGTH, trimPoint -1));
        }
    }

    // Write the length of the sequence
    codec.writeInt(basesToWrite != null ? basesToWrite : seqs.length);

    // Calculate and write the sequence and qualities
    final byte[] seqsAndQuals = encodeSeqsAndQuals(seqs, quals, retainedLength);
    codec.writeBytes(seqsAndQuals);
}
 
Example #2
Source File: BinaryBAMShardIndexWriter.java    From dataflow-java with Apache License 2.0 5 votes vote down vote up
/**
 * @param nRef Number of reference sequences. If zero is passed then header is not written.
 * This is useful in sharded writing as we only want the header written for the first shard.
 * 
 * @param output BAM index output stream.  This stream will be closed when BinaryBAMIndexWriter.close() is called.
 */
public BinaryBAMShardIndexWriter(final int nRef, final OutputStream output) {
    try {
        codec = new BinaryCodec(output);
        if (nRef > 0) {
          writeHeader(nRef);
        }
    } catch (final Exception e) {
        throw new SAMException("Exception opening output stream", e);
    }
}
 
Example #3
Source File: SparkUtils.java    From gatk with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
/**
 * Private helper method for {@link #convertHeaderlessHadoopBamShardToBam} that takes a SAMFileHeader and writes it
 * to the provided `OutputStream`, correctly encoded for the BAM format and preceded by the BAM magic bytes.
 *
 * @param samFileHeader SAM header to write
 * @param outputStream stream to write the SAM header to
 */
private static void writeBAMHeaderToStream( final SAMFileHeader samFileHeader, final OutputStream outputStream ) {
    final BlockCompressedOutputStream blockCompressedOutputStream = new BlockCompressedOutputStream(outputStream, (File)null);
    final BinaryCodec outputBinaryCodec = new BinaryCodec(new DataOutputStream(blockCompressedOutputStream));

    final String headerString;
    final Writer stringWriter = new StringWriter();
    new SAMTextHeaderCodec().encode(stringWriter, samFileHeader, true);
    headerString = stringWriter.toString();

    outputBinaryCodec.writeBytes(ReadUtils.BAM_MAGIC);

    // calculate and write the length of the SAM file header text and the header text
    outputBinaryCodec.writeString(headerString, true, false);

    // write the sequences binarily.  This is redundant with the text header
    outputBinaryCodec.writeInt(samFileHeader.getSequenceDictionary().size());
    for (final SAMSequenceRecord sequenceRecord: samFileHeader.getSequenceDictionary().getSequences()) {
        outputBinaryCodec.writeString(sequenceRecord.getSequenceName(), true, true);
        outputBinaryCodec.writeInt(sequenceRecord.getSequenceLength());
    }

    try {
        blockCompressedOutputStream.flush();
    } catch (final IOException ioe) {
        throw new RuntimeIOException(ioe);
    }
}
 
Example #4
Source File: Bam_OBA_Supplier.java    From cramtools with Apache License 2.0 5 votes vote down vote up
public Bam_OBA_Supplier(BufferedInputStream is) {
	this.is = is;
	codec = new BinaryCodec();
	codec.setInputStream(is);
	baos = new ByteArrayOutputStream();
	refId = Integer.MIN_VALUE;
	recordCounter = 0;
}
 
Example #5
Source File: SAMFileHeader_Utils.java    From cramtools with Apache License 2.0 5 votes vote down vote up
private static SAMSequenceRecord readSequenceRecord(final BinaryCodec stream, final String source) {
	final int nameLength = stream.readInt();
	if (nameLength <= 1) {
		throw new SAMFormatException("Invalid BAM file header: missing sequence name in file " + source);
	}
	final String sequenceName = stream.readString(nameLength - 1);
	// Skip the null terminator
	stream.readByte();
	final int sequenceLength = stream.readInt();
	return new SAMSequenceRecord(SAMSequenceRecord.truncateSequenceName(sequenceName), sequenceLength);
}
 
Example #6
Source File: SAMFileHeader_Utils.java    From cramtools with Apache License 2.0 5 votes vote down vote up
static void writeHeader(final BinaryCodec outputBinaryCodec, final SAMFileHeader samFileHeader,
		final String headerText) {
	outputBinaryCodec.writeBytes("BAM\1".getBytes());

	// calculate and write the length of the SAM file header text and the
	// header text
	outputBinaryCodec.writeString(headerText, true, false);

	// write the sequences binarily. This is redundant with the text header
	outputBinaryCodec.writeInt(samFileHeader.getSequenceDictionary().size());
	for (final SAMSequenceRecord sequenceRecord : samFileHeader.getSequenceDictionary().getSequences()) {
		outputBinaryCodec.writeString(sequenceRecord.getSequenceName(), true, true);
		outputBinaryCodec.writeInt(sequenceRecord.getSequenceLength());
	}
}
 
Example #7
Source File: SAMFileHeader_Utils.java    From cramtools with Apache License 2.0 5 votes vote down vote up
/**
 * Writes a header to a BAM file. Might need to regenerate the String
 * version of the header, if one already has both the samFileHeader and the
 * String, use the version of this method which takes both.
 */
static void writeHeader(final BinaryCodec outputBinaryCodec, final SAMFileHeader samFileHeader) {
	// Do not use SAMFileHeader.getTextHeader() as it is not updated when
	// changes to the underlying object are made
	final String headerString;
	final Writer stringWriter = new StringWriter();
	new SAMTextHeaderCodec().encode(stringWriter, samFileHeader, true);
	headerString = stringWriter.toString();

	writeHeader(outputBinaryCodec, samFileHeader, headerString);
}
 
Example #8
Source File: SAMFileHeader_Utils.java    From cramtools with Apache License 2.0 5 votes vote down vote up
protected static void writeHeader(final OutputStream outputStream, final SAMFileHeader samFileHeader) {
	final BlockCompressedOutputStream blockCompressedOutputStream = new BlockCompressedOutputStream(outputStream,
			null);
	final BinaryCodec outputBinaryCodec = new BinaryCodec(new DataOutputStream(blockCompressedOutputStream));
	writeHeader(outputBinaryCodec, samFileHeader);
	try {
		blockCompressedOutputStream.flush();
	} catch (final IOException ioe) {
		throw new RuntimeIOException(ioe);
	}
}
 
Example #9
Source File: SAMFileHeader_Utils.java    From cramtools with Apache License 2.0 4 votes vote down vote up
static SAMFileHeader readHeader(final BinaryCodec stream, final ValidationStringency validationStringency,
		final String source) throws IOException {

	final byte[] buffer = new byte[4];
	stream.readBytes(buffer);
	if (!Arrays.equals(buffer, "BAM\1".getBytes())) {
		throw new IOException("Invalid BAM file header");
	}

	final int headerTextLength = stream.readInt();
	final String textHeader = stream.readString(headerTextLength);
	final SAMTextHeaderCodec headerCodec = new SAMTextHeaderCodec();
	headerCodec.setValidationStringency(validationStringency);
	final SAMFileHeader samFileHeader = headerCodec.decode(new StringLineReader(textHeader), source);

	final int sequenceCount = stream.readInt();
	if (samFileHeader.getSequenceDictionary().size() > 0) {
		// It is allowed to have binary sequences but no text sequences, so
		// only validate if both are present
		if (sequenceCount != samFileHeader.getSequenceDictionary().size()) {
			throw new SAMFormatException("Number of sequences in text header ("
					+ samFileHeader.getSequenceDictionary().size() + ") != number of sequences in binary header ("
					+ sequenceCount + ") for file " + source);
		}
		for (int i = 0; i < sequenceCount; i++) {
			final SAMSequenceRecord binarySequenceRecord = readSequenceRecord(stream, source);
			final SAMSequenceRecord sequenceRecord = samFileHeader.getSequence(i);
			if (!sequenceRecord.getSequenceName().equals(binarySequenceRecord.getSequenceName())) {
				throw new SAMFormatException("For sequence " + i
						+ ", text and binary have different names in file " + source);
			}
			if (sequenceRecord.getSequenceLength() != binarySequenceRecord.getSequenceLength()) {
				throw new SAMFormatException("For sequence " + i
						+ ", text and binary have different lengths in file " + source);
			}
		}
	} else {
		// If only binary sequences are present, copy them into
		// samFileHeader
		final List<SAMSequenceRecord> sequences = new ArrayList<SAMSequenceRecord>(sequenceCount);
		for (int i = 0; i < sequenceCount; i++) {
			sequences.add(readSequenceRecord(stream, source));
		}
		samFileHeader.setSequenceDictionary(new SAMSequenceDictionary(sequences));
	}

	return samFileHeader;
}