htsjdk.samtools.util.IOUtil Java Examples

The following examples show how to use htsjdk.samtools.util.IOUtil. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: MultiTileFileUtil.java    From picard with MIT License 6 votes vote down vote up
MultiTileFileUtil(final String extension, final File base, final File bciDir, final FileFaker fileFaker,
                  final int lane) {
    super(false, extension, base, fileFaker, lane);
    bci = new File(bciDir, "s_" + lane + ".bci");
    if (bci.exists()) {
        tileIndex = new TileIndex(bci);
    } else {
        tileIndex = null;
    }
    final File[] filesMatchingRegexp = IOUtil.getFilesMatchingRegexp(base, matchPattern);
    if (filesMatchingRegexp == null || filesMatchingRegexp.length == 0) {
        dataFile = null;
    } else if (filesMatchingRegexp.length == 1) {
        dataFile = filesMatchingRegexp[0];
    } else {
        throw new PicardException("More than one filter file found in " + base.getAbsolutePath());
    }
}
 
Example #2
Source File: MergeBamAlignmentTest.java    From picard with MIT License 6 votes vote down vote up
@Test
public void testMergeHeaderMappedAndReference() throws IOException {
    final File unmappedSam = new File(TEST_DATA_DIR, "specialHeader.unmapped.sam");
    final File alignedSam = new File(TEST_DATA_DIR, "specialHeader.aligned.sam");
    final File expectedSam = new File(TEST_DATA_DIR, "specialHeader.expected.sam");
    final File refFasta = new File(TEST_DATA_DIR, "specialHeader.fasta");
    final File mergedSam = File.createTempFile("merged", ".sam");
    mergedSam.deleteOnExit();

    doMergeAlignment(unmappedSam, Collections.singletonList(alignedSam),
            null, null, null, null,
            false, true, false, 1,
            "0", "1.0", "align!", "myAligner",
            true, refFasta, mergedSam,
            null, null, null, null, true, null);

    assertSamValid(mergedSam);
    IOUtil.assertFilesEqual(expectedSam, mergedSam);
}
 
Example #3
Source File: NonNFastaSizeTest.java    From picard with MIT License 6 votes vote down vote up
@Test
public void noIntervals() throws IOException {
       final File input = new File(REFERENCE);
       final File outfile   = File.createTempFile("nonNcount", ".txt");
       outfile.deleteOnExit();
       final String[] args = new String[] {
               "INPUT="  + input.getAbsolutePath(),
               "OUTPUT=" + outfile.getAbsolutePath()
       };
       Assert.assertEquals(new NonNFastaSize().instanceMain(args), 0);

       final BufferedReader reader = IOUtil.openFileForBufferedReading(outfile);
       final String count = reader.readLine();

       try {
           Assert.assertEquals(Long.parseLong(count), 1008);
       } catch (Exception e) {
           System.err.println("Failed to read in count because of error: " + e.getMessage());
       }
   }
 
Example #4
Source File: IlluminaAdpcFileWriterTest.java    From picard with MIT License 6 votes vote down vote up
@Test
public void testWriteIlluminaAdpcFile() throws Exception {
    final File output = File.createTempFile("testIlluminaAdpcFileWriter.", ".adpc.bin");
    output.deleteOnExit();

    try (final IlluminaAdpcFileWriter adpcFileWriter = new IlluminaAdpcFileWriter(output)) {
        final List<IlluminaAdpcFileWriter.Record> adpcRecordList = new ArrayList<>();
        adpcRecordList.add(new IlluminaAdpcFileWriter.Record(11352, 405, 1.444f, 0.088f, 0.705f, IlluminaGenotype.AA));
        adpcRecordList.add(new IlluminaAdpcFileWriter.Record(458, 2743, 0.043f, 0.852f, 0.818f, IlluminaGenotype.BB));
        adpcRecordList.add(new IlluminaAdpcFileWriter.Record(7548, 303, 1.072f, 0.076f, 0.0f, IlluminaGenotype.NN));
        adpcRecordList.add(new IlluminaAdpcFileWriter.Record(7414, 2158, 0.805f, 0.597f, 0.881f, IlluminaGenotype.AB));
        adpcRecordList.add(new IlluminaAdpcFileWriter.Record(222, 215, 0.0f, 0.0f, 0.91f, IlluminaGenotype.NN));
        adpcRecordList.add(new IlluminaAdpcFileWriter.Record(232, 246, null, null, 0.926f, IlluminaGenotype.NN));
        adpcFileWriter.write(adpcRecordList);
    }
    IOUtil.assertFilesEqual(TEST_EXPECTED_ADPC_BIN_FILE, output);
}
 
Example #5
Source File: ReferenceSource.java    From cramtools with Apache License 2.0 6 votes vote down vote up
private static String addToRefCache(String md5, InputStream stream) {
	String localPath = new PathPattern(REF_CACHE).format(md5);
	File cachedFile = new File(localPath);
	if (!cachedFile.exists()) {
		log.info(String.format("Adding to REF_CACHE sequence md5=%s", md5));
		cachedFile.getParentFile().mkdirs();
		File tmpFile;
		try {
			tmpFile = File.createTempFile(md5, ".tmp", cachedFile.getParentFile());
			FileOutputStream fos = new FileOutputStream(tmpFile);
			IOUtil.copyStream(stream, fos);
			fos.close();
			if (!cachedFile.exists())
				tmpFile.renameTo(cachedFile);
			else
				tmpFile.delete();
		} catch (IOException e) {
			throw new RuntimeException(e);
		}
	}
	return localPath;
}
 
Example #6
Source File: MergeSamFilesTest.java    From picard with MIT License 6 votes vote down vote up
/**
 * Confirm that unsorted input can result in coordinate sorted output, with index created.
 */
@Test
public void unsortedInputSortedOutputTest() throws Exception {
    final File unsortedInputTestDataDir = new File(TEST_DATA_DIR, "unsorted_input");
    final File mergedOutput = File.createTempFile("unsortedInputSortedOutputTest.", BamFileIoUtils.BAM_FILE_EXTENSION);
    mergedOutput.deleteOnExit();
    final File mergedOutputIndex = new File(mergedOutput.getParent(), IOUtil.basename(mergedOutput)+ BAMIndex.BAI_INDEX_SUFFIX);
    mergedOutputIndex.deleteOnExit();

    final String[] args = {
            "I=" + new File(unsortedInputTestDataDir, "1.sam").getAbsolutePath(),
            "I=" + new File(unsortedInputTestDataDir, "2.sam").getAbsolutePath(),
            "O=" + mergedOutput.getAbsolutePath(),
            "CREATE_INDEX=true",
            "SO=coordinate"
    };
    final int mergeExitStatus = runPicardCommandLine(args);
    Assert.assertEquals(mergeExitStatus, 0);
    final SamReader reader = SamReaderFactory.makeDefault().open(mergedOutput);
    Assert.assertEquals(reader.getFileHeader().getSortOrder(), SAMFileHeader.SortOrder.coordinate);
    Assert.assertTrue(reader.hasIndex());
    new ValidateSamTester().assertSamValid(mergedOutput);
    Assert.assertTrue(mergedOutputIndex.delete());
    CloserUtil.close(reader);
}
 
Example #7
Source File: SamToFastqTest.java    From picard with MIT License 6 votes vote down vote up
private Map<String,MatePair> createSamMatePairsMap(final File samFile) throws IOException {
    IOUtil.assertFileIsReadable(samFile);
    final SamReader reader = SamReaderFactory.makeDefault().open(samFile);

    final Map<String,MatePair> map = new LinkedHashMap<String,MatePair>();
    for (final SAMRecord record : reader ) {
        MatePair mpair = map.get(record.getReadName());
        if (mpair == null) {
             mpair = new MatePair();
             map.put(record.getReadName(), mpair);
        }
        mpair.add(record);
    }
    reader.close();
    return map;
}
 
Example #8
Source File: CheckFingerprint.java    From picard with MIT License 6 votes vote down vote up
protected String[] customCommandLineValidation() {

        try {
            final boolean fileContainsReads = fileContainsReads(IOUtil.getPath(INPUT));
            if (!fileContainsReads && IGNORE_READ_GROUPS) {
                return new String[]{"The parameter IGNORE_READ_GROUPS can only be used with BAM/SAM/CRAM inputs."};
            }
            if (fileContainsReads && OBSERVED_SAMPLE_ALIAS != null) {
                return new String[]{"The parameter OBSERVED_SAMPLE_ALIAS can only be used with a VCF input."};
            }
        } catch (IOException e) {
            e.printStackTrace();
        }

        if (REFERENCE_SEQUENCE == null && INPUT.endsWith(SamReader.Type.CRAM_TYPE.fileExtension())) {
            return new String[]{"REFERENCE must be provided when using CRAM as input."};
        }

        return super.customCommandLineValidation();
    }
 
Example #9
Source File: GtcToVcf.java    From picard with MIT License 6 votes vote down vote up
@Override
protected String[] customCommandLineValidation() {

    IOUtil.assertFileIsReadable(INPUT);
    IOUtil.assertFileIsReadable(EXTENDED_ILLUMINA_MANIFEST);
    IOUtil.assertFileIsReadable(ILLUMINA_BEAD_POOL_MANIFEST_FILE);
    IOUtil.assertFileIsWritable(OUTPUT);
    refSeq = ReferenceSequenceFileFactory.getReferenceSequenceFile(REFERENCE_SEQUENCE);
    final SAMSequenceDictionary sequenceDictionary = refSeq.getSequenceDictionary();
    final String assembly = sequenceDictionary.getSequence(0).getAssembly();
    if (!assembly.equals("GRCh37")) {
        return new String[]{"The selected reference sequence ('" + assembly + "') is not supported.  This tool is currently only implemented to support NCBI Build 37 / HG19 Reference Sequence."};
    }

    if (FINGERPRINT_GENOTYPES_VCF_FILE != null) {
        IOUtil.assertFileIsReadable(FINGERPRINT_GENOTYPES_VCF_FILE);
    }
    if (GENDER_GTC != null) {
        IOUtil.assertFileIsReadable(GENDER_GTC);
    }

    return super.customCommandLineValidation();
}
 
Example #10
Source File: BaitDesigner.java    From picard with MIT License 6 votes vote down vote up
/** Method that writes out all the parameter values that were used in the design using reflection. */
void writeParametersFile(final File file) {
    try {
        final BufferedWriter out = IOUtil.openFileForBufferedWriting(file);
        for (final Field field : getClass().getDeclaredFields()) {
            if (Modifier.isPrivate(field.getModifiers())) continue;

            final String name = field.getName();

            if (name.toUpperCase().equals(name) && !name.equals("USAGE")) {
                final Object value = field.get(this);

                if (value != null) {
                    out.append(name);
                    out.append("=");
                    out.append(value.toString());
                    out.newLine();
                }
            }
        }
        out.close();
    } catch (Exception e) {
        throw new PicardException("Error writing out parameters file.", e);
    }
}
 
Example #11
Source File: SamBamUtils.java    From chipster with MIT License 6 votes vote down vote up
public static void sortSamBam(File samBamFile, File sortedBamFile) {
	
	SAMFileReader.setDefaultValidationStringency(ValidationStringency.SILENT);
	SAMFileReader reader = new SAMFileReader(IOUtil.openFileForReading(samBamFile));
	SAMFileWriter writer = null;
	try {
		
		reader.getFileHeader().setSortOrder(SAMFileHeader.SortOrder.coordinate);
		writer = new SAMFileWriterFactory().makeBAMWriter(reader.getFileHeader(), false, sortedBamFile);
		Iterator<SAMRecord> iterator = reader.iterator();
		while (iterator.hasNext()) {
			writer.addAlignment(iterator.next());
		}
		
	} finally {
		closeIfPossible(reader);
		closeIfPossible(writer);
	}
}
 
Example #12
Source File: ParseBarcodeFile.java    From Drop-seq with MIT License 6 votes vote down vote up
/**
 * Parses a tab delimited file with 2 columns and a header.
 * The header is CLUSTER BARCODE.  The header is required.
 * The two columns are a cluster identifier and a barcode per line of the file.
 * @param input
 * @return A map from each CLUSTER identifier to the set of barcodes related to it.
 */
public static Map<String, Set<String>> readCellClusterFile (final File input) {
	IOUtil.assertFileIsReadable(input);
	Map<String, Set<String>> result = new HashMap<String, Set<String>>();
	BasicInputParser parser = new BasicInputParser(false, 2, input);
	if (parser.hasNext()) {
		String [] header = parser.next();
		if (!header[0].equals("CLUSTER") || !header[1].equals("BARCODE"))
			throw new TranscriptomeException("The expected header for a cluster file is CLUSTER    BARCODE (tab separaterd)");
	}

	while(parser.hasNext()) {
		String [] line =parser.next();
		String cluster = line[0];
		String barcode = line[1];
		Set<String> c = result.get(cluster);
		if (c==null)
			c=new HashSet<String>();
		c.add(barcode);
		result.put(cluster, c);
	}
	return result;
}
 
Example #13
Source File: PerTilePerCycleFileUtil.java    From picard with MIT License 6 votes vote down vote up
/**
 * For the given tiles, populate a CycleIlluminaFileMap that contains all these tiles and will iterate through
 * all the files for these tiles in expectedBase
 * Side Effect: Assigns numCycles
 *
 * @return A CycleIlluminaFileMap with the listed (or all) tiles for at least expectedCycles number of cycles(or total available
 * cycles if expectedCycles is null)
 */
protected CycleIlluminaFileMap getPerTilePerCycleFiles() {
    final CycleIlluminaFileMap cycledMap = new CycleIlluminaFileMap();

    final File laneDir = base;
    final File[] tempCycleDirs;
    tempCycleDirs = IOUtil.getFilesMatchingRegexp(laneDir, IlluminaFileUtil.CYCLE_SUBDIRECTORY_PATTERN);
    if (tempCycleDirs == null || tempCycleDirs.length == 0) {
        return cycledMap;
    }

    for (final File tempCycleDir : tempCycleDirs) {
        detectedCycles.add(getCycleFromDir(tempCycleDir));
    }

    final Set<Integer> uniqueTiles = new HashSet<Integer>();

    for (final File cycleDir : tempCycleDirs) {
        final IlluminaFileMap fileMap = getTiledFiles(cycleDir, matchPattern);
        uniqueTiles.addAll(fileMap.keySet());
        cycledMap.put(getCycleFromDir(cycleDir), fileMap);
    }

    this.tiles = new ArrayList<>(uniqueTiles);
    return cycledMap;
}
 
Example #14
Source File: MarkDuplicatesTest.java    From picard with MIT License 6 votes vote down vote up
@Test(dataProvider = "testOpticalDuplicateDetectionDataProvider")
public void testOpticalDuplicateDetection(final File sam, final long expectedNumOpticalDuplicates) {
    final File outputDir = IOUtil.createTempDir(TEST_BASE_NAME + ".", ".tmp");
    outputDir.deleteOnExit();
    final File outputSam = new File(outputDir, TEST_BASE_NAME + ".sam");
    outputSam.deleteOnExit();
    final File metricsFile = new File(outputDir, TEST_BASE_NAME + ".duplicate_metrics");
    metricsFile.deleteOnExit();
    // Run MarkDuplicates, merging the 3 input files, and either enabling or suppressing PG header
    // record creation according to suppressPg.
    final MarkDuplicates markDuplicates = new MarkDuplicates();
    markDuplicates.setupOpticalDuplicateFinder();
    markDuplicates.INPUT = CollectionUtil.makeList(sam.getAbsolutePath());
    markDuplicates.OUTPUT = outputSam;
    markDuplicates.METRICS_FILE = metricsFile;
    markDuplicates.TMP_DIR = CollectionUtil.makeList(outputDir);
    // Needed to suppress calling CommandLineProgram.getVersion(), which doesn't work for code not in a jar
    markDuplicates.PROGRAM_RECORD_ID = null;
    Assert.assertEquals(markDuplicates.doWork(), 0);
    Assert.assertEquals(markDuplicates.numOpticalDuplicates(), expectedNumOpticalDuplicates);
    IOUtil.recursiveDelete(outputDir.toPath());

}
 
Example #15
Source File: CalculateFingerprintMetrics.java    From picard with MIT License 6 votes vote down vote up
@Override
protected int doWork() {

    final List<Path> inputPaths = IOUtil.getPaths(INPUT);
    IOUtil.assertPathsAreReadable(inputPaths);
    IOUtil.assertFileIsReadable(HAPLOTYPE_MAP);
    IOUtil.assertFileIsWritable(OUTPUT);

    final FingerprintChecker checker = new FingerprintChecker(HAPLOTYPE_MAP);

    final MetricsFile<FingerprintMetrics, ?> metricsFile = getMetricsFile();
    final Map<FingerprintIdDetails, Fingerprint> fpMap = checker.fingerprintFiles(inputPaths, 1, 1, TimeUnit.DAYS);
    final Map<FingerprintIdDetails, Fingerprint> mergedFpMap = Fingerprint.mergeFingerprintsBy(fpMap,Fingerprint.getFingerprintIdDetailsStringFunction(CALCULATE_BY));

    metricsFile.addAllMetrics(mergedFpMap.values().stream().map(this::getFingerprintMetrics).collect(Collectors.toList()));
    metricsFile.write(OUTPUT);

    return 0;
}
 
Example #16
Source File: CalculateReadGroupChecksum.java    From picard with MIT License 6 votes vote down vote up
@Override
protected int doWork() {
    final File output =
            OUTPUT == null
                    ? new File(INPUT.getParentFile(), getOutputFileName(INPUT))
                    : OUTPUT;

    IOUtil.assertFileIsWritable(output);
    final String hashText = SAMUtils.calculateReadGroupRecordChecksum(INPUT, REFERENCE_SEQUENCE);

    try {
        final FileWriter outputWriter = new FileWriter(output);
        outputWriter.write(hashText);
        outputWriter.close();
    } catch (final IOException ioe) {
        throw new PicardException(
                "Could not write the computed hash (" + hashText + ") to the output file: " + ioe.getMessage(), ioe);
    }
    return 0;
}
 
Example #17
Source File: IlluminaLaneMetricsCollectorTest.java    From picard with MIT License 6 votes vote down vote up
/** Silently continue if we encounter a tile without phasing/pre-phasing metrics. */
@Test
public void testMissingPhasingValuesSilent() throws IOException {
    final ReadStructure readStructure = new ReadStructure("151T8B8B151T");
    for (final boolean useReadStructure : Arrays.asList(true, false)) {
        final File runDirectory = TEST_MISSING_PHASING_DIRECTORY;
        final CollectIlluminaLaneMetrics clp = new CollectIlluminaLaneMetrics();
        clp.OUTPUT_DIRECTORY = IOUtil.createTempDir("illuminaLaneMetricsCollectorTest", null);
        clp.RUN_DIRECTORY = runDirectory;
        clp.OUTPUT_PREFIX = "test";
        clp.VALIDATION_STRINGENCY = ValidationStringency.SILENT;
        if (useReadStructure) clp.READ_STRUCTURE = readStructure;
        clp.doWork();

        final File phasingMetricsFile = buildOutputFile(clp.OUTPUT_DIRECTORY, clp.OUTPUT_PREFIX, IlluminaPhasingMetrics.getExtension());
        final File canonicalPhasingFile = buildOutputFile(runDirectory, runDirectory.getName(), IlluminaPhasingMetrics.getExtension());
        IOUtil.assertFilesEqual(canonicalPhasingFile, phasingMetricsFile);

        final File laneMetricsFile = buildOutputFile(clp.OUTPUT_DIRECTORY, clp.OUTPUT_PREFIX, IlluminaLaneMetrics.getExtension());
        final File canonicalLaneFile = buildOutputFile(runDirectory, runDirectory.getName(), IlluminaLaneMetrics.getExtension());
        IOUtil.assertFilesEqual(canonicalLaneFile, laneMetricsFile);
        IOUtil.deleteDirectoryTree(clp.OUTPUT_DIRECTORY);
    }
}
 
Example #18
Source File: MaskReferenceSequence.java    From Drop-seq with MIT License 6 votes vote down vote up
@Override
protected int doWork() {
	IOUtil.assertFileIsReadable(this.REFERENCE_SEQUENCE);
	IOUtil.assertFileIsWritable(this.OUTPUT);
	// validate that an index is present for the reference sequence, since it's required.
	final ReferenceSequenceFile ref = ReferenceSequenceFileFactory.getReferenceSequenceFile(REFERENCE_SEQUENCE, true, true);
	if (!ref.isIndexed())
		throw new IllegalStateException ("Input fasta must be indexed.  You can do this by using samtools faidx to create an index");

	FastaSequenceFileWriter writer = new FastaSequenceFileWriter(OUTPUT, OUTPUT_LINE_LENGTH);
	if (this.CONTIG_PATTERN_TO_IGNORE!=null && !this.CONTIG_PATTERN_TO_IGNORE.isEmpty()) processByWholeContig(ref, writer, this.CONTIG_PATTERN_TO_IGNORE);
	if (this.INTERVALS!=null) processByPartialContig(ref, writer, this.INTERVALS);

	CloserUtil.close(ref);
	CloserUtil.close(writer);
	return 0;

}
 
Example #19
Source File: MergeDgeSparse.java    From Drop-seq with MIT License 6 votes vote down vote up
private Set<String> loadSelectedCellsLists(final List<File> files) {
    final Set<String> ret = new HashSet<>();
    final Pattern comment = Pattern.compile("#");
    final Pattern whitespace = Pattern.compile("\\s");
    for (final File file : files) {
        final BufferedReader reader = IOUtil.openFileForBufferedReading(file);
        String line;
        try {
            while ((line = reader.readLine()) != null) {
                // Remove trailing comments
                String[] fields = comment.split(line, 2);
                if (!fields[0].isEmpty()) {
                    // Remove trailing whitespace
                    fields = whitespace.split(fields[0], 2);
                    if (!fields[0].isEmpty())
			ret.add(fields[0]);
                }
            }
        } catch (IOException e) {
            throw new RuntimeIOException("Exception reading " + file.getAbsolutePath(), e);
        }
    }
    return ret;
}
 
Example #20
Source File: SamBamUtils.java    From chipster with MIT License 6 votes vote down vote up
public void indexBam(File bamFile, File baiFile) {
	SAMFileReader.setDefaultValidationStringency(ValidationStringency.SILENT);
       final SamReader bam;

           // input from a normal file
           IOUtil.assertFileIsReadable(bamFile);
           bam = SamReaderFactory.makeDefault().referenceSequence(null)
                   .enable(SamReaderFactory.Option.INCLUDE_SOURCE_IN_RECORDS)
                   .open(bamFile);

       if (bam.type() != SamReader.Type.BAM_TYPE) {
           throw new SAMException("Input file must be bam file, not sam file.");
       }

       if (!bam.getFileHeader().getSortOrder().equals(SAMFileHeader.SortOrder.coordinate)) {
           throw new SAMException("Input bam file must be sorted by coordinate");
       }

       BAMIndexer.createIndex(bam, baiFile);

       CloserUtil.close(bam);
}
 
Example #21
Source File: PicardIndexedFastaSequenceFile.java    From chipster with MIT License 6 votes vote down vote up
/**
 * Open the given indexed fasta sequence file.  Throw an exception if the file cannot be opened.
 * @param path The file to open.
 * @param index Pre-built FastaSequenceIndex, for the case in which one does not exist on disk.
 */
public PicardIndexedFastaSequenceFile(final Path path, final FastaSequenceIndex index) {
    super(path);
    if (index == null) throw new IllegalArgumentException("Null index for fasta " + path);
    this.index = index;
    IOUtil.assertFileIsReadable(path);
    try {
        this.channel = Files.newByteChannel(path);
    } catch (IOException e) {
        throw new SAMException("Fasta file should be readable but is not: " + path, e);
    }
    reset();

    if(getSequenceDictionary() != null)
        sanityCheckDictionaryAgainstIndex(path.toAbsolutePath().toString(),sequenceDictionary,index);
}
 
Example #22
Source File: SetNmMdAndUqTags.java    From picard with MIT License 6 votes vote down vote up
protected int doWork() {
    IOUtil.assertFileIsReadable(INPUT);
    IOUtil.assertFileIsWritable(OUTPUT);
    final SamReader reader = SamReaderFactory.makeDefault().referenceSequence(REFERENCE_SEQUENCE).open(INPUT);

    if (reader.getFileHeader().getSortOrder() != SAMFileHeader.SortOrder.coordinate) {
        throw new SAMException("Input must be coordinate-sorted for this program to run. Found: " + reader.getFileHeader().getSortOrder());
    }

    final SAMFileWriter writer = new SAMFileWriterFactory().makeSAMOrBAMWriter(reader.getFileHeader(), true, OUTPUT);
    writer.setProgressLogger(
            new ProgressLogger(log, (int) 1e7, "Wrote", "records"));

    final ReferenceSequenceFileWalker refSeqWalker = new ReferenceSequenceFileWalker(REFERENCE_SEQUENCE);

    StreamSupport.stream(reader.spliterator(), false)
            .peek(rec -> fixRecord(rec, refSeqWalker))
            .forEach(writer::addAlignment);
    CloserUtil.close(reader);
    writer.close();
    return 0;
}
 
Example #23
Source File: CompareDropSeqAlignments.java    From Drop-seq with MIT License 6 votes vote down vote up
private void writeContigReport (final File outFile, final ObjectCounter<ContigResult> contigResults) {
	PrintStream writer = new ErrorCheckingPrintStream(IOUtil.openFileForWriting(outFile));
	List<String> header = new ArrayList<>();
	header.add("INPUT_1="+this.INPUT_1.toString());
	header.add("INPUT_2="+this.INPUT_2.toString());
	header.add("READ_QUALITY="+this.READ_QUALITY);
	header.add("TRIM_CONTIG_STRING="+this.TRIM_CONTIG_STRING);
	String h = StringUtils.join(header, "\t");
	writer.print("#");
	writer.println(h);

	String [] colNames = {"ORIGINAL_CONTIG", "NEW_CONTIGS", "NUM_READS", "MAPPED_UNIQUELY"};
	writer.println(StringUtil.join("\t", colNames));
	List<ContigResult> crList = contigResults.getKeysOrderedByCount(true);
	for (ContigResult cr: crList) {
		String [] body = {cr.getOldContig(), StringUtil.join(",", cr.getNewContigs()), Integer.toString(contigResults.getCountForKey(cr)), Boolean.toString(cr.isNewReadMapsUniquely())};
		writer.println(StringUtil.join("\t", body));
	}
	writer.close();
}
 
Example #24
Source File: IlluminaLaneMetricsCollectorTest.java    From picard with MIT License 6 votes vote down vote up
/** Ensures that an exception is thrown when we encounter a tile without phasing/pre-phasing metrics. */
@Test(expectedExceptions = PicardException.class)
public void testMissingPhasingValuesStrict() {
    final ReadStructure readStructure = new ReadStructure("151T8B8B151T");
    for (final boolean useReadStructure : Arrays.asList(true, false)) {
        final File runDirectory = TEST_MISSING_PHASING_DIRECTORY;
        final CollectIlluminaLaneMetrics clp = new CollectIlluminaLaneMetrics();
        clp.OUTPUT_DIRECTORY = IOUtil.createTempDir("illuminaLaneMetricsCollectorTest", null);
        clp.RUN_DIRECTORY = runDirectory;
        clp.OUTPUT_PREFIX = "test";
        clp.VALIDATION_STRINGENCY = ValidationStringency.STRICT;
        if (useReadStructure) clp.READ_STRUCTURE = readStructure;
        clp.doWork();

        final File phasingMetricsFile = buildOutputFile(clp.OUTPUT_DIRECTORY, clp.OUTPUT_PREFIX, IlluminaPhasingMetrics.getExtension());
        final File canonicalPhasingFile = buildOutputFile(runDirectory, runDirectory.getName(), IlluminaPhasingMetrics.getExtension());
        IOUtil.assertFilesEqual(canonicalPhasingFile, phasingMetricsFile);

        final File laneMetricsFile = buildOutputFile(clp.OUTPUT_DIRECTORY, clp.OUTPUT_PREFIX, IlluminaLaneMetrics.getExtension());
        final File canonicalLaneFile = buildOutputFile(runDirectory, runDirectory.getName(), IlluminaLaneMetrics.getExtension());
        IOUtil.assertFilesEqual(canonicalLaneFile, laneMetricsFile);
        IOUtil.deleteDirectoryTree(clp.OUTPUT_DIRECTORY);
    }
}
 
Example #25
Source File: HaplotypeBAMWriterUnitTest.java    From gatk with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
private int getReadCounts(final Path result) throws IOException {
    IOUtil.assertFileIsReadable(result);

    int count = 0;
    try (final SamReader in = SamReaderFactory.makeDefault().validationStringency(ValidationStringency.SILENT).open(result)) {
        for (@SuppressWarnings("unused") final SAMRecord rec : in) {
            count++;
        }
    }
    return count;
}
 
Example #26
Source File: SequenceDictionaryUtils.java    From picard with MIT License 5 votes vote down vote up
public static SortingCollection<String> makeSortingCollection() {
    final File tmpDir = IOUtil.createTempDir("SamDictionaryNames", null);
    tmpDir.deleteOnExit();
    // 256 byte for one name, and 1/10 part of all memory for this, rough estimate
    long maxNamesInRam = Runtime.getRuntime().maxMemory() / 256 / 10;
    return SortingCollection.newInstance(
            String.class,
            new StringCodec(),
            String::compareTo,
            (int) Math.min(maxNamesInRam, Integer.MAX_VALUE),
            tmpDir.toPath()
    );
}
 
Example #27
Source File: GtcToVcfTest.java    From picard with MIT License 5 votes vote down vote up
private List<InfiniumGTCRecord> loadInfiniumGTCRecords() throws FileNotFoundException {
    final List<String> lines = IOUtil.slurpLines(TEST_GTC_RECORDS_FILE);
    final List<InfiniumGTCRecord> infiniumGTCRecords = new ArrayList<>();
    for (String line : lines) {
        infiniumGTCRecords.add(new InfiniumGTCRecord(line));
    }
    return infiniumGTCRecords;
}
 
Example #28
Source File: ZCallPedFile.java    From picard with MIT License 5 votes vote down vote up
/**
 *
 * @param pedFile  .ped files are whitespace-separated files.
 *                The .ped format is defined at http://zzz.bwh.harvard.edu/plink/data.shtml#ped.
 *                The first six columns, in the following order, are mandatory:
 *                  Family ID
 *                  Individual ID
 *                  Paternal ID
 *                  Maternal ID
 *                  Sex (1 = male, 2 = female, other = unknown)
 *                  Phenotype
 *                The seventh column onward should be biallelic genotype data. ZCall outputs these as A or B,
 *                representing which cluster an allele falls in. Each element of the biallelic pairs should still
 *                be tab-separated.
 *                This file should be a single line, representing a single sample.
 * @param mapFile .map files are whitespace-separated files.
 *                The .map format is defined at http://zzz.bwh.harvard.edu/plink/data.shtml#map.
 *                It has exactly four columns in the following order:
 *                  Chromosome (1-22, X, Y, or 0 if unknown)
 *                  rs# or SNP identifier
 *                  Genetic distance in morgans
 *                  Base-pair position in bp units
 * @return A ZCallPedFile representing the input .ped and .map files.
 * @throws FileNotFoundException
 */
public static ZCallPedFile fromFile(final File pedFile,
                                    final File mapFile) throws FileNotFoundException {
    final String[] pedFileLines = IOUtil.slurpLines(pedFile).toArray(new String[0]);
    if (pedFileLines.length > 1) {
        throw new PicardException("Only single-sample .ped files are supported.");
    }
    final String[] pedFileFields = IOUtil.slurp(pedFile).split("\\s");
    final String[] mapFileLines = IOUtil.slurpLines(mapFile).toArray(new String[0]);

    final ZCallPedFile zCallPedFile = new ZCallPedFile();

    /* first six fields are ignored
        Family ID
        Individual ID
        Paternal ID
        Maternal ID
        Sex (1=male; 2=female; other=unknown)
        Phenotype
     */
    //two fields for each snp (each allele)
    for (int i = 0; i < mapFileLines.length; i++) {
        final int index = (i * 2) + OFFSET;
        // The fields are supposed to be one character each
        if (pedFileFields[index].length() != 1 || pedFileFields[index + 1].length() != 1) {
            throw new PicardException("Malformed file: each allele should be a single character.");
        }
        final String alleles = pedFileFields[index] + pedFileFields[index + 1];
        zCallPedFile.addAllele(mapFileLines[i].split("\\s")[1], alleles);
    }
    return zCallPedFile;
}
 
Example #29
Source File: GenotypeSperm.java    From Drop-seq with MIT License 5 votes vote down vote up
@Override
protected int doWork() {
	// validation
	IOUtil.assertFileIsReadable(INPUT);
	IOUtil.assertFileIsWritable(OUTPUT);
	IOUtil.assertFileIsReadable(this.CELL_BC_FILE);
	
	List<String> cellBarcodes = ParseBarcodeFile.readCellBarcodeFile(this.CELL_BC_FILE);
	log.info("Using " + cellBarcodes.size() + " cells in analysis");
	IntervalList snpIntervals = IntervalList.fromFile(INTERVALS);
	log.info("Using " + snpIntervals.getIntervals().size() + " SNP intervals in analysis");
	PrintStream out = new ErrorCheckingPrintStream(IOUtil.openFileForWriting(OUTPUT));
	writeHeader(out);

	SamReader reader = SamReaderFactory.makeDefault().enable(SamReaderFactory.Option.EAGERLY_DECODE).open(this.INPUT);			
	
	SNPUMIBasePileupIterator sbpi = getIter(reader, snpIntervals, cellBarcodes);
	
	MultiCellDigitalAlleleCountsIterator multiIter = new MultiCellDigitalAlleleCountsIterator(new DigitalAlleleCountsIterator(sbpi, BASE_QUALITY));

	// sort cell barcodes alphabetically for output.
	Collections.sort(cellBarcodes);
	@SuppressWarnings("unused")
	int counter=0;
	while (multiIter.hasNext()) {
		MultiCellDigitalAlleleCounts mcdac = multiIter.next();
		processMCDAC(cellBarcodes, mcdac, out, AUTO_FLUSH_OUTPUTS);
		counter++;
		if (counter%PROGRESS_RATE==0) log.info("Processed " + counter + " SNPs");
	}
	log.info("Processed " + counter +" total SNPs");
	out.close();
	multiIter.close();
	return 0;
}
 
Example #30
Source File: NewIlluminaBasecallsConverter.java    From picard with MIT License 5 votes vote down vote up
private synchronized SortingCollection<CLUSTER_OUTPUT_RECORD> newSortingCollection() {
    final int maxRecordsInRam =
            Math.max(1, maxReadsInRamPerTile /
                    barcodeRecordWriterMap.size());
    return SortingCollection.newInstanceFromPaths(
            outputRecordClass,
            codecPrototype.clone(),
            outputRecordComparator,
            maxRecordsInRam,
            IOUtil.filesToPaths(tmpDirs));
}