Java Code Examples for htsjdk.samtools.util.CloseableIterator#next()

The following examples show how to use htsjdk.samtools.util.CloseableIterator#next() . You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: SingleCellRnaSeqMetricsCollector.java    From Drop-seq with MIT License 7 votes vote down vote up
RnaSeqMetricsCollector getRNASeqMetricsCollector(final String cellBarcodeTag, final List<String> cellBarcodes, final File inBAM,
  		final RnaSeqMetricsCollector.StrandSpecificity strand, final double rRNAFragmentPCT, final int readMQ,
  		final File annotationsFile, final File rRNAIntervalsFile) {

  	CollectorFactory factory = new CollectorFactory(inBAM, strand, rRNAFragmentPCT, annotationsFile, rRNAIntervalsFile);
RnaSeqMetricsCollector collector=  factory.getCollector(cellBarcodes);
List<SAMReadGroupRecord> rg = factory.getReadGroups(cellBarcodes);

      // iterate by cell barcodes.  Skip all the reads without cell barcodes.
CloseableIterator<SAMRecord> iter = getReadsInTagOrder (inBAM, cellBarcodeTag, rg, cellBarcodes, readMQ);

      ProgressLogger p = new ProgressLogger(log, 1000000, "Accumulating metrics");
while (iter.hasNext()) {
	SAMRecord r = iter.next();
	String cellBarcode = r.getStringAttribute(cellBarcodeTag);
	r.setAttribute("RG", cellBarcode);
          p.record(r);
   	collector.acceptRecord(r, null);
}

collector.finish();
return (collector);
  }
 
Example 2
Source File: SortVcfsTest.java    From picard with MIT License 6 votes vote down vote up
/**
 * Checks the ordering and total number of variant context entries in the specified output VCF file.
 * Does NOT check explicitly that the VC genomic positions match exactly those from the inputs. We assume this behavior from other tests.
 *
 * @param output VCF file representing the output of SortVCF
 * @param expectedVariantContextCount the total number of variant context entries from all input files that were merged/sorted
 */
private void validateSortingResults(final File output, final int expectedVariantContextCount) {
    final VCFFileReader outputReader = new VCFFileReader(output, false);
    final VariantContextComparator outputComparator = outputReader.getFileHeader().getVCFRecordComparator();
    VariantContext last = null;
    int variantContextCount = 0;
    final CloseableIterator<VariantContext> iterator = outputReader.iterator();
    while (iterator.hasNext()) {
        final VariantContext outputContext = iterator.next();
        if (last != null) Assert.assertTrue(outputComparator.compare(last, outputContext) <= 0);
        last = outputContext;
        variantContextCount++;
    }
    iterator.close();
    Assert.assertEquals(variantContextCount, expectedVariantContextCount);
}
 
Example 3
Source File: TestFilterVcf.java    From picard with MIT License 6 votes vote down vote up
@Test(dataProvider = "goodInputVcfs")
public void testJavaScript(final File input) throws Exception {
    final File out = VcfTestUtils.createTemporaryIndexedFile("filterVcfTestJS.", ".vcf");
    final FilterVcf filterer = new FilterVcf();
    filterer.INPUT = input;
    filterer.OUTPUT = out;
    filterer.JAVASCRIPT_FILE = quickJavascriptFilter("variant.getStart()%5 != 0");

    final int retval = filterer.doWork();
    Assert.assertEquals(retval, 0);

    //count the number of reads
    final int expectedNumber = 4;
    int count = 0;
    VCFFileReader in = new VCFFileReader(filterer.OUTPUT, false);
    CloseableIterator<VariantContext> iter = in.iterator();
    while (iter.hasNext()) {
        final VariantContext ctx = iter.next();
        count += (ctx.isFiltered() ? 1 : 0);
    }
    iter.close();
    in.close();
    Assert.assertEquals(count, expectedNumber);
}
 
Example 4
Source File: VcfFormatConverter.java    From picard with MIT License 5 votes vote down vote up
@Override
protected int doWork() {
    final ProgressLogger progress = new ProgressLogger(LOG, 10000);
    
    IOUtil.assertFileIsReadable(INPUT);
    IOUtil.assertFileIsWritable(OUTPUT);

    final VCFFileReader reader = new VCFFileReader(INPUT, REQUIRE_INDEX);
    final VCFHeader header = new VCFHeader(reader.getFileHeader());
    final SAMSequenceDictionary sequenceDictionary = header.getSequenceDictionary();
    if (CREATE_INDEX && sequenceDictionary == null) {
        throw new PicardException("A sequence dictionary must be available in the input file when creating indexed output.");
    }

    final VariantContextWriterBuilder builder = new VariantContextWriterBuilder()
            .setOutputFile(OUTPUT)
            .setReferenceDictionary(sequenceDictionary);
    if (CREATE_INDEX)
        builder.setOption(Options.INDEX_ON_THE_FLY);
    else
        builder.unsetOption(Options.INDEX_ON_THE_FLY);
    final VariantContextWriter writer = builder.build();
    writer.writeHeader(header);
    final CloseableIterator<VariantContext> iterator = reader.iterator();

    while (iterator.hasNext()) {
        final VariantContext context = iterator.next();
        writer.add(context);
        progress.record(context.getContig(), context.getStart());
    }

    CloserUtil.close(iterator);
    CloserUtil.close(reader);
    writer.close();

    return 0;
}
 
Example 5
Source File: CleanSam.java    From picard with MIT License 5 votes vote down vote up
/**
 * Do the work after command line has been parsed.
 * RuntimeException may be thrown by this method, and are reported appropriately.
 *
 * @return program exit status.
 */
@Override
protected int doWork() {
    IOUtil.assertFileIsReadable(INPUT);
    IOUtil.assertFileIsWritable(OUTPUT);
    final SamReaderFactory factory = SamReaderFactory.makeDefault().referenceSequence(REFERENCE_SEQUENCE);
    if (VALIDATION_STRINGENCY == ValidationStringency.STRICT) {
        factory.validationStringency(ValidationStringency.LENIENT);
    }
    final SamReader reader = factory.open(INPUT);
    final SAMFileWriter writer = new SAMFileWriterFactory().makeSAMOrBAMWriter(reader.getFileHeader(), true, OUTPUT);
    final CloseableIterator<SAMRecord> it = reader.iterator();
    final ProgressLogger progress = new ProgressLogger(Log.getInstance(CleanSam.class));

    // If the read (or its mate) maps off the end of the alignment, clip it
    while (it.hasNext()) {
        final SAMRecord rec = it.next();

        // If the read (or its mate) maps off the end of the alignment, clip it
        AbstractAlignmentMerger.createNewCigarsIfMapsOffEndOfReference(rec);

        // check the read's mapping quality
        if (rec.getReadUnmappedFlag() && 0 != rec.getMappingQuality()) {
            rec.setMappingQuality(0);
        }

        writer.addAlignment(rec);
        progress.record(rec);
    }

    writer.close();
    it.close();
    CloserUtil.close(reader);
    return 0;
}
 
Example 6
Source File: InputValidationTest.java    From imputationserver with GNU Affero General Public License v3.0 5 votes vote down vote up
public void testTabixIndexCreationChr1() throws IOException {

		String configFolder = "test-data/configs/hapmap-chr1";
		// input folder contains no vcf or vcf.gz files
		String inputFolder = "test-data/data/single";

		// create workflow context
		WorkflowTestContext context = buildContext(inputFolder, "hapmap2");
		context.setInput("phasing", "eagle");

		// create step instance
		InputValidation inputValidation = new InputValidationMock(configFolder);

		// run and test
		boolean result = run(context, inputValidation);

		// check if step is failed
		assertEquals(true, result);
		assertTrue(context.hasInMemory("[OK] 1 valid VCF file(s) found."));

		
		// test tabix index and count snps
		String vcfFilename = inputFolder + "/minimac_test.50.vcf.gz";
		VCFFileReader vcfReader = new VCFFileReader(new File(vcfFilename),
				new File(vcfFilename + TabixUtils.STANDARD_INDEX_EXTENSION), true);
		CloseableIterator<VariantContext> snps = vcfReader.query("1", 1, 1000000000);
		int count = 0;
		while (snps.hasNext()) {
			snps.next();
			count++;
		}
		snps.close();
		vcfReader.close();
		
		//check snps
		assertEquals(905, count);

	}
 
Example 7
Source File: InputValidationTest.java    From imputationserver with GNU Affero General Public License v3.0 5 votes vote down vote up
public void testTabixIndexCreationChr20() throws IOException {

		String configFolder = "test-data/configs/hapmap-chr1";
		// input folder contains no vcf or vcf.gz files
		String inputFolder = "test-data/data/chr20-phased";

		// create workflow context
		WorkflowTestContext context = buildContext(inputFolder, "hapmap2");

		// create step instance
		InputValidation inputValidation = new InputValidationMock(configFolder);

		// run and test
		boolean result = run(context, inputValidation);

		// check if step is failed
		assertEquals(true, result);
		assertTrue(context.hasInMemory("[OK] 1 valid VCF file(s) found."));

		
		// test tabix index and count snps
		String vcfFilename = inputFolder + "/chr20.R50.merged.1.330k.recode.small.vcf.gz";
		VCFFileReader vcfReader = new VCFFileReader(new File(vcfFilename),
				new File(vcfFilename + TabixUtils.STANDARD_INDEX_EXTENSION), true);
		CloseableIterator<VariantContext> snps = vcfReader.query("20", 1, 1000000000);
		int count = 0;
		while (snps.hasNext()) {
			snps.next();
			count++;
		}
		snps.close();
		vcfReader.close();
		
		//check snps
		assertEquals(7824, count);

	}
 
Example 8
Source File: FastQualityControlTest.java    From imputationserver with GNU Affero General Public License v3.0 5 votes vote down vote up
public void testCountSamplesInCreatedChunk() throws IOException {

		String configFolder = "test-data/configs/hapmap-chr1";
		String inputFolder = "test-data/data/simulated-chip-1chr-imputation";

		// create workflow context
		WorkflowTestContext context = buildContext(inputFolder, "hapmap2");

		// get output directory
		String out = context.getOutput("chunksDir");

		// create step instance
		FastQualityControlMock qcStats = new FastQualityControlMock(configFolder);

		// run and test
		assertTrue(run(context, qcStats));
		for (File file : new File(out).listFiles()) {
			if (file.getName().endsWith("chunk_1_80000001_100000000.vcf.gz")) {
				VCFFileReader vcfReader = new VCFFileReader(file, false);
				CloseableIterator<VariantContext> it = vcfReader.iterator();
				if (it.hasNext()) {
					VariantContext a = it.next();
					assertEquals(255, a.getNSamples());
				}
				vcfReader.close();
			}

		}

	}
 
Example 9
Source File: ThreadsafeTest.java    From picard with MIT License 5 votes vote down vote up
@Test
public void ensureUniqueVariantObservationsEspeciallyMultiAllelicOnesThatAppearAtChunkingBoundaries() {
    final VariantIteratorProducer.Threadsafe iteratorFactory =
            new VariantIteratorProducer.Threadsafe(
                    VcfFileSegmentGenerator.byWholeContigSubdividingWithWidth(TEN_MILLION),
                    Arrays.asList(VCF_WITH_MULTI_ALLELIC_VARIANT_AT_POSITION_10MILLION)
            );
    final Set<String> observed = new HashSet<String>();
    for (final CloseableIterator<VariantContext> i : iteratorFactory.iterators()) {
        while (i.hasNext()) {
            final VariantContext next = i.next();
            Assert.assertTrue(observed.add(next.toString()), "Second observation for " + next.toString());
        }
    }
}
 
Example 10
Source File: IntervalTagComparatorTest.java    From Drop-seq with MIT License 5 votes vote down vote up
@Test(enabled=false)
/**
 * For this test, we generate a lot of data, and sort it.
 * Since the SAMRecord doesn't really need to change, generate one static record and a LOT of intervals to tag the record with.
 * Size of the SAMRecord doesn't change, but the speed of the comparator should.
 */
public void testSpeed () {
	int numRecords = 30000000;
	int readLength=60;
	SamReader inputSam = SamReaderFactory.makeDefault().open(dictFile);
	SAMSequenceDictionary dict= inputSam.getFileHeader().getSequenceDictionary();
	dict = filterSD(dict);

	long start = System.currentTimeMillis();

	GenerateIntervalTaggedSAMRecords gen = new GenerateIntervalTaggedSAMRecords(dict, intervalTag, readLength, numRecords);
	IntervalTagComparator comparator = new IntervalTagComparator(this.intervalTag);

	final CloseableIterator<SAMRecord> sortingIterator =
            SamRecordSortingIteratorFactory.create(inputSam.getFileHeader(), gen, comparator, null);

	while(sortingIterator.hasNext()) {
		SAMRecord r = sortingIterator.next();
		Assert.assertNotNull(r);
	}
	long elapsed = System.currentTimeMillis() - start;
	System.out.println("elapsed time = " + elapsed + " ms");
	System.out.println("elapsed time = " + Math.round(elapsed/1000) + " seconds");
}
 
Example 11
Source File: SNPUMICellReadIteratorWrapperTest.java    From Drop-seq with MIT License 5 votes vote down vote up
/**
 * Integration style test to see if I can throw null pointers, etc with a bit of real data.
 */
@Test(enabled=true)
public void processReads() {
	List<String> cellBarcodeList = ParseBarcodeFile.readCellBarcodeFile(cellBCFile);
	IntervalList loci = IntervalList.fromFile(snpIntervals);

       SamReader reader = SamReaderFactory.makeDefault().enable(SamReaderFactory.Option.EAGERLY_DECODE).open(bamFile);
       // Filter records before sorting, to reduce I/O
       final MissingTagFilteringIterator filteringIterator =
               new MissingTagFilteringIterator(reader.iterator(), GENE_NAME_TAG, cellBarcodeTag, molBCTag);

       MapQualityFilteredIterator filteringIterator2 = new MapQualityFilteredIterator(filteringIterator, readMQ, true);

       GeneFunctionIteratorWrapper gfteratorWrapper = new GeneFunctionIteratorWrapper(filteringIterator2, GENE_NAME_TAG, GENE_STRAND_TAG, GENE_FUNCTION_TAG, false, STRAND_STRATEGY, LOCUS_FUNCTION_LIST);

       SNPUMICellReadIteratorWrapper snpumiCellReadIterator = new SNPUMICellReadIteratorWrapper(gfteratorWrapper, loci, cellBarcodeTag, cellBarcodeList, GENE_NAME_TAG, snpTag, readMQ);

       // create comparators in the order the data should be sorted
       final MultiComparator<SAMRecord> multiComparator = new MultiComparator<>(
               new StringTagComparator(cellBarcodeTag),
               new StringTagComparator(GENE_NAME_TAG),
               new StringTagComparator(molBCTag));

       final CloseableIterator<SAMRecord> sortingIterator =
               SamRecordSortingIteratorFactory.create(reader.getFileHeader(), snpumiCellReadIterator, multiComparator, null);

       Assert.assertTrue(sortingIterator.hasNext());
       SAMRecord nextRead = sortingIterator.next();
       List<SAMTagAndValue> tagValues= nextRead.getAttributes();
       String snpTagValue = nextRead.getStringAttribute(this.snpTag);
	CloserUtil.close(snpumiCellReadIterator);
}
 
Example 12
Source File: ImputationChrXTest.java    From imputationserver with GNU Affero General Public License v3.0 4 votes vote down vote up
@Test
public void testChrXLeaveOneOutPipelinePhased() throws IOException, ZipException {

	// SNP 26963697 from input excluded and imputed!
	// true genotypes:
	// 1,1|1,1|1,1|1,1,1|1,1,1|1,1|1,1,0,1|1,1|0,1,1,1,1,1,1|1,1,1|1,1|1,1|1,1|1,1|1,1|0,

	String configFolder = "test-data/configs/hapmap-chrX";
	String inputFolder = "test-data/data/chrX-phased-loo";

	File file = new File("test-data/tmp");
	if (file.exists()) {
		FileUtil.deleteDirectory(file);
	}

	// create workflow context
	WorkflowTestContext context = buildContext(inputFolder, "phase1");

	// run qc to create chunkfile
	QcStatisticsMock qcStats = new QcStatisticsMock(configFolder);
	boolean result = run(context, qcStats);

	assertTrue(result);

	// add panel to hdfs
	importRefPanel(FileUtil.path(configFolder, "ref-panels"));
	// importMinimacMap("test-data/B38_MAP_FILE.map");
	importBinaries("files/bin");

	// run imputation
	ImputationMinimac3Mock imputation = new ImputationMinimac3Mock(configFolder);
	result = run(context, imputation);
	assertTrue(result);

	// run export
	CompressionEncryptionMock export = new CompressionEncryptionMock("files");
	result = run(context, export);
	assertTrue(result);

	ZipFile zipFile = new ZipFile("test-data/tmp/local/chr_X.zip", PASSWORD.toCharArray());
	zipFile.extractAll("test-data/tmp");

	VcfFile vcfFile = VcfFileUtil.load("test-data/tmp/chrX.dose.vcf.gz", 100000000, false);

	VCFFileReader vcfReader = new VCFFileReader(new File(vcfFile.getVcfFilename()), false);

	CloseableIterator<VariantContext> it = vcfReader.iterator();

	while (it.hasNext()) {

		VariantContext line = it.next();

		if (line.getStart() == 26963697) {
			assertEquals(2, line.getHetCount());
			assertEquals(1, line.getHomRefCount());
			assertEquals(23, line.getHomVarCount());

		}
	}

	vcfReader.close();

	FileUtil.deleteDirectory(file);

}
 
Example 13
Source File: ImputationChrXTest.java    From imputationserver with GNU Affero General Public License v3.0 4 votes vote down vote up
@Test
public void testPipelineChrXWithEaglePhasingOnly() throws IOException, ZipException {
	
	if (!new File(
			"test-data/configs/hapmap-chrX-hg38/ref-panels/ALL.X.nonPAR.phase1_v3.snps_indels_svs.genotypes.all.noSingleton.recode.hg38.bcf")
					.exists()) {
		System.out.println("chrX bcf nonPAR file not available");
		return;
	}


	String configFolder = "test-data/configs/hapmap-chrX";
	String inputFolder = "test-data/data/chrX-unphased";

	// create workflow context
	WorkflowTestContext context = buildContext(inputFolder, "phase1");
	
	context.setInput("mode", "phasing");

	// run qc to create chunkfile
	QcStatisticsMock qcStats = new QcStatisticsMock(configFolder);
	boolean result = run(context, qcStats);

	assertTrue(result);

	// add panel to hdfs
	importRefPanel(FileUtil.path(configFolder, "ref-panels"));
	// importMinimacMap("test-data/B38_MAP_FILE.map");
	importBinaries("files/bin");

	// run imputation
	ImputationMinimac3Mock imputation = new ImputationMinimac3Mock(configFolder);
	result = run(context, imputation);
	assertTrue(result);

	// run export
	CompressionEncryptionMock export = new CompressionEncryptionMock("files");
	result = run(context, export);
	assertTrue(result);

	ZipFile zipFile = new ZipFile("test-data/tmp/local/chr_X.zip", PASSWORD.toCharArray());
	zipFile.extractAll("test-data/tmp");

	VcfFile vcfFile = VcfFileUtil.load("test-data/tmp/chrX.phased.vcf.gz", 100000000, false);
	
	assertEquals(true, vcfFile.isPhased());
	
	VCFFileReader vcfReader = new VCFFileReader(new File(vcfFile.getVcfFilename()), false);
	
	CloseableIterator<VariantContext> it = vcfReader.iterator();

	while (it.hasNext()) {

		VariantContext line = it.next();

		if (line.getStart() == 44322058) {
			assertEquals("A", line.getGenotype("HG00096").getGenotypeString());
			System.out.println(line.getGenotype("HG00097").getGenotypeString());
			assertEquals("A|A", line.getGenotype("HG00097").getGenotypeString());
		}
	}
	
	vcfReader.close();

	FileUtil.deleteDirectory("test-data/tmp");

}
 
Example 14
Source File: BamToDetailsConversion.java    From chipster with MIT License 4 votes vote down vote up
/**
 * Find reads in a given range.
 * 
 * <p>
 * TODO add cigar to the list of returned values
 * <p>
 * TODO add pair information to the list of returned values
 * 
 * @param request
 * @return
 * @throws InterruptedException 
 */
@Override
protected void processDataRequest(DataRequest request) throws InterruptedException {
	
	// Read the given region
	CloseableIterator<SAMRecord> iterator = dataSource.query(request.start.chr, request.start.bp.intValue(), request.end.bp.intValue());
	
	// Produce results
	while (iterator.hasNext()) {

		List<Feature> responseList = new LinkedList<Feature>();

		// Split results into chunks
		for (int c = 0; c < RESULT_CHUNK_SIZE && iterator.hasNext(); c++) {
			SAMRecord record = iterator.next();

			// Region for this read
			Region recordRegion = new Region((long) record.getAlignmentStart(), (long) record.getAlignmentEnd(), request.start.chr);

			// Values for this read
			LinkedHashMap<DataType, Object> values = new LinkedHashMap<DataType, Object>();

			Feature read = new Feature(recordRegion, values);

			if (request.getRequestedContents().contains(DataType.ID)) {
				values.put(DataType.ID, record.getReadName());
			}

			if (request.getRequestedContents().contains(DataType.STRAND)) {
				values.put(DataType.STRAND, BamUtils.getStrand(record, coverageType));					
			}

			if (request.getRequestedContents().contains(DataType.QUALITY)) {
				values.put(DataType.QUALITY, record.getBaseQualityString());
			}

			if (request.getRequestedContents().contains(DataType.CIGAR)) {
				Cigar cigar = new Cigar(read, record.getCigar());
				values.put(DataType.CIGAR, cigar);
			}

			// TODO Deal with "=" and "N" in read string
			if (request.getRequestedContents().contains(DataType.SEQUENCE)) {
				String seq = record.getReadString();
				values.put(DataType.SEQUENCE, seq);
			}

			if (request.getRequestedContents().contains(DataType.MATE_POSITION)) {
				
				BpCoord mate = new BpCoord((Long)(long)record.getMateAlignmentStart(),
						new Chromosome(record.getMateReferenceName()));
				
				values.put(DataType.MATE_POSITION, mate);
			}
			
			if (request.getRequestedContents().contains(DataType.BAM_TAG_NH)) {
				Object ng = record.getAttribute("NH");
				if (ng != null) {
					values.put(DataType.BAM_TAG_NH, (Integer)record.getAttribute("NH"));
				}
			}
			
			/*
			 * NOTE! RegionContents created from the same read area has to be equal in methods equals, hash and compareTo. Primary types
			 * should be ok, but objects (including tables) has to be handled in those methods separately. Otherwise tracks keep adding
			 * the same reads to their read sets again and again.
			 */
			responseList.add(read);
		}

		// Send result			
		super.createDataResult(new DataResult(request.getStatus(), responseList));			
	}

	// We are done
	iterator.close();
}
 
Example 15
Source File: pullLargeLengths.java    From HMMRATAC with GNU General Public License v3.0 4 votes vote down vote up
/**
 * Read the data and create a list of lengths
 */
private void read(){
	int counter = 0;
	SAMFileReader reader = new SAMFileReader(bam,index);
	ArrayList<Double> temp = new ArrayList<Double>();
	for (int i = 0; i < genome.size();i++){
		String chr = genome.get(i).getChrom();
		int start = genome.get(i).getStart();
		int stop = genome.get(i).getStop();
		CloseableIterator<SAMRecord> iter = reader.query(chr,start,stop,false);
		while (iter.hasNext()){
			SAMRecord record = null;
			try{
				record = iter.next();
			}
			catch(SAMFormatException ex){
				System.out.println("SAM Record is problematic. Has mapQ != 0 for unmapped read. Will continue anyway");
			}
			if(record != null){
			if(!record.getReadUnmappedFlag() && !record.getMateUnmappedFlag() && record.getFirstOfPairFlag()) {
				if (record.getMappingQuality() >= minQ){
					
					if (Math.abs(record.getInferredInsertSize()) > 100 && Math.abs(record.getInferredInsertSize())
							< 1000){
						counter+=1;
						temp.add((double)Math.abs(record.getInferredInsertSize()));
					}
				}
			}
			}
		}
		iter.close();
	}
	reader.close();
	lengths = new double[counter];
	for (int i = 0;i < temp.size();i++){
		if (temp.get(i) > 100){
			lengths[i] = temp.get(i);
		}
	}
	
}
 
Example 16
Source File: SomaticLocusCaller.java    From abra2 with MIT License 4 votes vote down vote up
private Counts getCounts(SamReader reader, LocusInfo locus) {
		
		int depth = 0;
		int altCount = 0;
		int refCount = 0;
		
		CloseableIterator<SAMRecord> iter = reader.queryOverlapping(locus.chromosome, locus.posStart, locus.posStop);
		while (iter.hasNext()) {
			SAMRecord read = iter.next();
			if (!read.getDuplicateReadFlag()) {
				depth += 1;
				
				Object[] baseAndQual = getBaseAndQualAtPosition(read, locus.posStart);
				Character base = (Character) baseAndQual[0];
				int baseQual = (Integer) baseAndQual[1];
				Character refBase = c2r.getSequence(locus.chromosome, locus.posStart, 1).charAt(0);
				
				// Override input with actual reference
//				locus.ref = new String(new char[] { refBase });
				locus.actualRef = new String(new char[] { refBase });
				
				if (locus.isIndel()) {
					if (hasIndel(read, locus)) {
						altCount += 1;
					} else if (!base.equals('N') && base.equals(refBase)) {
						refCount += 1;
					}
				} else if (baseQual >= minBaseQual) {
					
					if (!base.equals('N') && base.equals(refBase)) {
						refCount += 1;
					} else if (base.equals(locus.alt.charAt(0))) {
						altCount += 1;
					}
				}
			}
		}
		
		iter.close();
		
		return new Counts(refCount, altCount, depth);
	}
 
Example 17
Source File: BamToCoverageConversion.java    From chipster with MIT License 4 votes vote down vote up
private void calculateCoverage(DataRequest request, BpCoord from, BpCoord to) throws InterruptedException {	
			
	//query data for full average bins, because merging them later would be difficult
	long start = CoverageTool.getBin(from.bp);		
	long end = CoverageTool.getBin(to.bp) + CoverageTool.BIN_SIZE - 1;

	//if end coordinate is smaller than 1 Picard returns a full chromosome and we'll run out of memory
	if (end < 1) {
		end = 1;
	}
			
	CloseableIterator<SAMRecord> iterator = dataSource.query(from.chr, (int)start, (int)end);		
	
	BaseStorage forwardBaseStorage = new BaseStorage();
	BaseStorage reverseBaseStorage = new BaseStorage();
	
	while (iterator.hasNext()) {
		
		SAMRecord record = iterator.next();
		
		LinkedHashMap<DataType, Object> values = new LinkedHashMap<DataType, Object>();

		Region recordRegion = new Region((long) record.getAlignmentStart(), (long) record.getAlignmentEnd(), request.start.chr);
		
		Feature read = new Feature(recordRegion, values);

		values.put(DataType.ID, record.getReadName());
		
		values.put(DataType.STRAND, BamUtils.getStrand(record, coverageType));
		
		Cigar cigar = new Cigar(read, record.getCigar());
		values.put(DataType.CIGAR, cigar);
		
		String seq = record.getReadString();
		values.put(DataType.SEQUENCE, seq);
		
		
		// Split read into continuous blocks (elements) by using the cigar
		List<ReadPart> parts = Cigar.splitElements(read);
		
		for (ReadPart part : parts) {				 
			
			if (read.values.get(DataType.STRAND) == Strand.FORWARD) {
				forwardBaseStorage.addNucleotideCounts(part);
			} else if (read.values.get(DataType.STRAND) == Strand.REVERSE) {
				reverseBaseStorage.addNucleotideCounts(part);			
			}
		}						
	}
			
	// We are done
	iterator.close();
			
	/* Reads that overlap query regions create nucleotide counts outside the query region.
	 * Remove those extra nucleotide counts, because they don't contain all reads of those regions and would show
	 * wrong information. 
	 */
	Region filterRegion = new Region(start, end, from.chr);
	forwardBaseStorage.filter(filterRegion);
	reverseBaseStorage.filter(filterRegion);

	// Send result		
	LinkedList<Feature> resultList = new LinkedList<Feature>();					
	
	createResultList(from, forwardBaseStorage, resultList, Strand.FORWARD);
	createResultList(from, reverseBaseStorage, resultList, Strand.REVERSE);		
	
	LinkedList<Feature> averageCoverage = CoverageTool.average(resultList, from.chr);
	
	if (request.getRequestedContents().contains(DataType.COVERAGE)) {		
		super.createDataResult(new DataResult(request, resultList));
	}
	
	if (request.getRequestedContents().contains(DataType.COVERAGE_AVERAGE)) {
		super.createDataResult(new DataResult(request, averageCoverage));
	}
}
 
Example 18
Source File: MergePedIntoVcf.java    From picard with MIT License 4 votes vote down vote up
/**
 * Writes out the VariantContext objects in the order presented to the supplied output file
 * in VCF format.
 */
private void writeVcf(final CloseableIterator<VariantContext> variants,
                      final File output,
                      final SAMSequenceDictionary dict,
                      final VCFHeader vcfHeader, ZCallPedFile zCallPedFile) {

    try(VariantContextWriter writer = new VariantContextWriterBuilder()
            .setOutputFile(output)
            .setReferenceDictionary(dict)
            .setOptions(VariantContextWriterBuilder.DEFAULT_OPTIONS)
            .build()) {
        writer.writeHeader(vcfHeader);
        while (variants.hasNext()) {
            final VariantContext context = variants.next();

            final VariantContextBuilder builder = new VariantContextBuilder(context);
            if (zCallThresholds.containsKey(context.getID())) {
                final String[] zThresh = zCallThresholds.get(context.getID());
                builder.attribute(InfiniumVcfFields.ZTHRESH_X, zThresh[0]);
                builder.attribute(InfiniumVcfFields.ZTHRESH_Y, zThresh[1]);
            }
            final Genotype originalGenotype = context.getGenotype(0);
            final Map<String, Object> newAttributes = originalGenotype.getExtendedAttributes();
            final VCFEncoder vcfEncoder = new VCFEncoder(vcfHeader, false, false);
            final Map<Allele, String> alleleMap = vcfEncoder.buildAlleleStrings(context);

            final String zCallAlleles = zCallPedFile.getAlleles(context.getID());
            if (zCallAlleles == null) {
                throw new PicardException("No zCall alleles found for snp " + context.getID());
            }
            final List<Allele> zCallPedFileAlleles = buildNewAllelesFromZCall(zCallAlleles, context.getAttributes());
            newAttributes.put(InfiniumVcfFields.GTA, alleleMap.get(originalGenotype.getAllele(0)) + UNPHASED + alleleMap.get(originalGenotype.getAllele(1)));
            newAttributes.put(InfiniumVcfFields.GTZ, alleleMap.get(zCallPedFileAlleles.get(0)) + UNPHASED + alleleMap.get(zCallPedFileAlleles.get(1)));

            final Genotype newGenotype = GenotypeBuilder.create(originalGenotype.getSampleName(), zCallPedFileAlleles,
                    newAttributes);
            builder.genotypes(newGenotype);
            logger.record("0", 0);
            // AC, AF, and AN are recalculated here
            VariantContextUtils.calculateChromosomeCounts(builder, false);
            final VariantContext newContext = builder.make();
            writer.add(newContext);
        }
    }
}
 
Example 19
Source File: SplitVcfs.java    From picard with MIT License 4 votes vote down vote up
@Override
protected int doWork() {
    IOUtil.assertFileIsReadable(INPUT);
    final ProgressLogger progress = new ProgressLogger(log, 10000);

    final VCFFileReader fileReader = new VCFFileReader(INPUT, false);
    final VCFHeader fileHeader = fileReader.getFileHeader();

    final SAMSequenceDictionary sequenceDictionary =
            SEQUENCE_DICTIONARY != null
                    ? SamReaderFactory.makeDefault().referenceSequence(REFERENCE_SEQUENCE).getFileHeader(SEQUENCE_DICTIONARY).getSequenceDictionary()
                    : fileHeader.getSequenceDictionary();
    if (CREATE_INDEX && sequenceDictionary == null) {
        throw new PicardException("A sequence dictionary must be available (either through the input file or by setting it explicitly) when creating indexed output.");
    }

    final VariantContextWriterBuilder builder = new VariantContextWriterBuilder()
            .setReferenceDictionary(sequenceDictionary)
            .clearOptions();
    if (CREATE_INDEX)
        builder.setOption(Options.INDEX_ON_THE_FLY);

    final VariantContextWriter snpWriter = builder.setOutputFile(SNP_OUTPUT).build();
    final VariantContextWriter indelWriter = builder.setOutputFile(INDEL_OUTPUT).build();
    snpWriter.writeHeader(fileHeader);
    indelWriter.writeHeader(fileHeader);

    int incorrectVariantCount = 0;

    final CloseableIterator<VariantContext> iterator = fileReader.iterator();
    while (iterator.hasNext()) {
        final VariantContext context = iterator.next();
        if (context.isIndel()) indelWriter.add(context);
        else if (context.isSNP()) snpWriter.add(context);
        else {
            if (STRICT) throw new IllegalStateException("Found a record with type " + context.getType().name());
            else incorrectVariantCount++;
        }

        progress.record(context.getContig(), context.getStart());
    }

    if (incorrectVariantCount > 0) {
        log.debug("Found " + incorrectVariantCount + " records that didn't match SNP or INDEL");
    }

    CloserUtil.close(iterator);
    CloserUtil.close(fileReader);
    snpWriter.close();
    indelWriter.close();

    return 0;
}
 
Example 20
Source File: BamTagOfTagCounts.java    From Drop-seq with MIT License 4 votes vote down vote up
public TagOfTagResults<String,String> getResults (final File inputBAM, final String primaryTag, final String secondaryTag, final boolean filterPCRDuplicates, final Integer readQuality) {
	TagOfTagResults<String,String> result = new TagOfTagResults<>();
	SamReader reader = SamReaderFactory.makeDefault().open(inputBAM);
	CloseableIterator<SAMRecord> iter = CustomBAMIterators.getReadsInTagOrder(reader, primaryTag);
	CloserUtil.close(reader);
	String currentTag="";
	Set<String> otherTagCollection=new HashSet<>();

	ProgressLogger progress = new ProgressLogger(log);

	while (iter.hasNext()) {
		SAMRecord r = iter.next();
		progress.record(r);
		// skip reads that don't pass filters.
		boolean discardResult=(filterPCRDuplicates && r.getDuplicateReadFlag()) || r.getMappingQuality()<readQuality || r.isSecondaryOrSupplementary();
		// short circuit if read is below the quality to be considered.
		if (discardResult) continue;

		Object d = r.getAttribute(secondaryTag);
		// short circuit if there's no tag for this read.
		if (d==null) continue;

		String data=null;

		if (d instanceof String)
			data=r.getStringAttribute(secondaryTag);
		else if (d instanceof Integer)
			data=Integer.toString(r.getIntegerAttribute(secondaryTag));


		String newTag = r.getStringAttribute(primaryTag);
		if (newTag==null)
			newTag="";

		// if you see a new tag.
		if (!currentTag.equals(newTag)) {
			// write out tag results, if any.
			if (!currentTag.equals("") && otherTagCollection.size()>0)
				result.addEntries(currentTag, otherTagCollection);
			currentTag=newTag;
			otherTagCollection.clear();
			if (!discardResult) otherTagCollection=addTagToCollection (data,otherTagCollection);

		} else // gather stats
		if (!discardResult) otherTagCollection=addTagToCollection (data,otherTagCollection);
	}
	if (otherTagCollection.size()>0)
		result.addEntries(currentTag, otherTagCollection);

	return (result);
}