htsjdk.samtools.util.StringUtil Java Examples

The following examples show how to use htsjdk.samtools.util.StringUtil. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: CollapseTagWithContext.java    From Drop-seq with MIT License 6 votes vote down vote up
private void writeMetrics (final boolean writeEditDistanceDistribution, final String context, final AdaptiveMappingResult r, final PrintStream out) {
	if (out==null) return;
	List<EditDistanceMappingMetric> metricList= r.getMetricResult();

	for (EditDistanceMappingMetric edmm: metricList) {
		edmm.getOriginalObservations();
		// Steve reports the number of barcodes including the one that everything is merged into.
		List<String> line = new ArrayList<>(Arrays.asList(context, edmm.getBarcode(), Integer.toString(edmm.getNumMergedBarcodes()+1), Integer.toString(edmm.getEditDistanceDiscovered()), Integer.toString(edmm.getEditDistanceUsed()),
				Integer.toString(edmm.getOriginalObservations()), Integer.toString(edmm.getTotalObservations())));

		if (writeEditDistanceDistribution) {
			int [] edList = edmm.getEdList();
			if (edList.length>0) {
				Integer[] x = Arrays.stream( edList ).boxed().toArray( Integer[]::new );
				String edFormatted = StringUtil.join(",", x);
				line.add(edFormatted);
			} else
				line.add("NA");

		}
		out.println(StringUtil.join("\t", line));

	}
}
 
Example #2
Source File: SNPUMIBasePileupTest.java    From Drop-seq with MIT License 6 votes vote down vote up
@Test(enabled=true)
/**
 * Add an uneven number of bases and quals to trip the exception throw.
 */
public void testAddBaseQualsError () {
	int snpPos=76227022;
	Interval snpInterval = new Interval("HUMAN_1", snpPos, snpPos, true, "test");
	SNPUMIBasePileup p = new SNPUMIBasePileup(snpInterval, "ACADM", "fake_cell", "AAAAA");
	char [] bases = {'A', 'A'};
	byte [] quals = {27,17,55};
	byte [] bases2 = new byte [bases.length];
	StringUtil.charsToBytes(bases, 0, bases.length, bases2, 0);
	boolean passes=false;
	try {
		p.setBasesAndQualities(bases2, quals);
	} catch (IllegalArgumentException e) {
		Assert.assertNotNull(e);
		passes=true;
	}
	Assert.assertTrue(passes);
}
 
Example #3
Source File: TrimSequenceTemplate.java    From Drop-seq with MIT License 6 votes vote down vote up
/**
 * Test to see if this read matches this barcode. If any base of a barcode
 * starts with N or n, then ignore that position.
 *
 * @param testString
 *            The read to look for this barcode in. The barcode should be at
 *            the start of the read for this method.  The entire barcode is expected for a match.
 * @return true if this barcode is found in the read.
 */
public boolean hasForwardMatch(final String testString) {
	byte[] testBases = StringUtil.stringToBytes(testString);
	int numBasesCanMatch = 0;
	int numBasesMatch = 0;
	for (int i = 0; i < bases.length; i++) {
		if (isIgnoreBase(this.bases[i]))
			continue;
		numBasesCanMatch++;
		if (SequenceUtil.basesEqual(testBases[i], bases[i]))
			numBasesMatch++;
	}
	if (numBasesCanMatch == numBasesMatch)
		return (true);
	return false;
}
 
Example #4
Source File: SNPUMIBasePileupTest.java    From Drop-seq with MIT License 6 votes vote down vote up
@Test(enabled=true)
public void testAddBaseQuals () {
	int snpPos=76227022;
	Interval snpInterval = new Interval("HUMAN_1", snpPos, snpPos, true, "test");
	SNPUMIBasePileup p = new SNPUMIBasePileup(snpInterval, "ACADM", "fake_cell", "AAAAA");
	char [] bases = {'A', 'A'};
	byte [] quals = {27,55};
	byte [] bases2 = new byte [bases.length];
	StringUtil.charsToBytes(bases, 0, bases.length, bases2, 0);
	boolean passes=true;
	try {
		p.setBasesAndQualities(bases2, quals);
	} catch (IllegalArgumentException e) {
		passes=false;
	}
	Assert.assertTrue(passes);
}
 
Example #5
Source File: ContextAccumulator.java    From picard with MIT License 6 votes vote down vote up
/**
 * Fills a zeroContextAccumulator by summing over the appropriate counts from a fullContextAccumulator.
 */
public void fillZeroRecords(final ContextAccumulator fullContextAccumulator, final int contextSize) {
    final String padding = StringUtil.repeatCharNTimes('N', contextSize);

    for (Map.Entry<String,AlignmentAccumulator[]> fullContext : fullContextAccumulator.artifactMap.entrySet()) {
        final String fullContextKey = fullContext.getKey();
        final char centralBase = fullContextKey.charAt(contextSize);
        final String zeroContextKey = padding + centralBase + padding;

        final AlignmentAccumulator[] zeroAlignmentAccumulators = this.artifactMap.get(zeroContextKey);
        final AlignmentAccumulator[] fullAlignmentAccumulators = fullContext.getValue();

        for (int i=0; i < fullAlignmentAccumulators.length; i++) {
            zeroAlignmentAccumulators[i].merge(fullAlignmentAccumulators[i]);
        }
    }
}
 
Example #6
Source File: CreateSequenceDictionary.java    From varsim with BSD 2-Clause "Simplified" License 6 votes vote down vote up
/**
 * Create one SAMSequenceRecord from a single fasta sequence
 */
private SAMSequenceRecord makeSequenceRecord(final ReferenceSequence refSeq) {
  final SAMSequenceRecord ret = new SAMSequenceRecord(refSeq.getName(), refSeq.length());

  // Compute MD5 of upcased bases
  final byte[] bases = refSeq.getBases();
  for (int i = 0; i < bases.length; ++i) {
    bases[i] = StringUtil.toUpperCase(bases[i]);
  }

  ret.setAttribute(SAMSequenceRecord.MD5_TAG, md5Hash(bases));
  if (GENOME_ASSEMBLY != null) {
    ret.setAttribute(SAMSequenceRecord.ASSEMBLY_TAG, GENOME_ASSEMBLY);
  }
  ret.setAttribute(SAMSequenceRecord.URI_TAG, URI);
  if (SPECIES != null) {
    ret.setAttribute(SAMSequenceRecord.SPECIES_TAG, SPECIES);
  }
  return ret;
}
 
Example #7
Source File: CollectRnaSeqMetricsTest.java    From picard with MIT License 6 votes vote down vote up
public File getRefFlatFile(String sequence) throws Exception {
    // Create a refFlat file with a single gene containing two exons, one of which is overlapped by the
    // ribosomal interval.
    final String[] refFlatFields = new String[RefFlatColumns.values().length];
    refFlatFields[RefFlatColumns.GENE_NAME.ordinal()] = "myGene";
    refFlatFields[RefFlatColumns.TRANSCRIPT_NAME.ordinal()] = "myTranscript";
    refFlatFields[RefFlatColumns.CHROMOSOME.ordinal()] = sequence;
    refFlatFields[RefFlatColumns.STRAND.ordinal()] = "+";
    refFlatFields[RefFlatColumns.TX_START.ordinal()] = "49";
    refFlatFields[RefFlatColumns.TX_END.ordinal()] = "500";
    refFlatFields[RefFlatColumns.CDS_START.ordinal()] = "74";
    refFlatFields[RefFlatColumns.CDS_END.ordinal()] = "400";
    refFlatFields[RefFlatColumns.EXON_COUNT.ordinal()] = "2";
    refFlatFields[RefFlatColumns.EXON_STARTS.ordinal()] = "49,249";
    refFlatFields[RefFlatColumns.EXON_ENDS.ordinal()] = "200,500";

    final File refFlatFile = File.createTempFile("tmp.", ".refFlat");
    refFlatFile.deleteOnExit();
    final PrintStream refFlatStream = new PrintStream(refFlatFile);
    refFlatStream.println(StringUtil.join("\t", refFlatFields));
    refFlatStream.close();

    return refFlatFile;
}
 
Example #8
Source File: IlluminaDataProviderFactory.java    From picard with MIT License 6 votes vote down vote up
/**
 * Call this method to create a ClusterData iterator over the specified tiles.
 *
 * @return An iterator for reading the Illumina basecall output for the lane specified in the constructor.
 */
public BaseIlluminaDataProvider makeDataProvider(List<Integer> requestedTiles) {
    if (requestedTiles == null) {
        requestedTiles = availableTiles;
    } else {
        if (requestedTiles.isEmpty()) {
            throw new PicardException("Zero length tile list supplied to makeDataProvider, you must specify at least 1 tile OR pass NULL to use all available tiles");
        }
    }

    final Map<IlluminaParser, Set<IlluminaDataType>> parsersToDataType = new HashMap<>();
    for (final Map.Entry<SupportedIlluminaFormat, Set<IlluminaDataType>> fmToDt : formatToDataTypes.entrySet()) {
        parsersToDataType.put(makeParser(fmToDt.getKey(), requestedTiles), fmToDt.getValue());
    }

    log.debug("The following parsers will be used by IlluminaDataProvider: " + StringUtil.join("," + parsersToDataType.keySet()));

    return new IlluminaDataProvider(outputMapping, parsersToDataType, basecallDirectory, lane);
}
 
Example #9
Source File: LikelihoodUtilsTest.java    From Drop-seq with MIT License 6 votes vote down vote up
@Test(enabled=true)
public void testMixedLikelihoodMultiRead () {
	GenotypeType [] g = {GenotypeType.HOM_REF, GenotypeType.HET, GenotypeType.HOM_VAR};
	List<GenotypeType> genotypes  = Arrays.asList(g);

	Double [] m = {new Double(2), new Double(1), new Double(1)};
	List<Double> mixture  = Arrays.asList(m);

	char refAllele ='A';
	char altAllele ='T';

	Byte [] b = {StringUtil.charToByte('A'), StringUtil.charToByte('A')};
	List<Byte> bases = Arrays.asList(b);
	Byte [] q = {new Byte ((byte)10), new Byte ((byte)10)};
	List<Byte> qualities =Arrays.asList(q);

	double result = LikelihoodUtils.getInstance().getLogLikelihoodMixedModel(refAllele, altAllele, genotypes, mixture, bases, qualities, null, null, null);
	Assert.assertEquals(result, Math.log10(0.36), 0.001);

}
 
Example #10
Source File: IlluminaBasecallsToSam.java    From picard with MIT License 6 votes vote down vote up
/**
 * Assert that expectedCols are present and return actualCols - expectedCols
 *
 * @param actualCols   The columns present in the LIBRARY_PARAMS file
 * @param expectedCols The columns that are REQUIRED
 * @return actualCols - expectedCols
 */
private Set<String> findAndFilterExpectedColumns(final Set<String> actualCols, final Set<String> expectedCols) {
    final Set<String> missingColumns = new HashSet<>(expectedCols);
    missingColumns.removeAll(actualCols);

    if (!missingColumns.isEmpty()) {
        throw new PicardException(String.format(
                "LIBRARY_PARAMS file %s is missing the following columns: %s.",
                LIBRARY_PARAMS.getAbsolutePath(), StringUtil.join(", ", missingColumns
                )));
    }

    final Set<String> remainingColumns = new HashSet<>(actualCols);
    remainingColumns.removeAll(expectedCols);
    return remainingColumns;
}
 
Example #11
Source File: IlluminaBasecallsToSam.java    From picard with MIT License 6 votes vote down vote up
/**
 * Given a set of columns assert that all columns conform to the format of an RG header attribute (i.e. 2 letters)
 * the attribute is NOT a member of the rgHeaderTags that are built by default in buildSamHeaderParameters
 *
 * @param rgTagColumns A set of columns that should conform to the rg header attribute format
 */
private void checkRgTagColumns(final Set<String> rgTagColumns) {
    final Set<String> forbiddenHeaders = buildSamHeaderParameters(null).keySet();
    forbiddenHeaders.retainAll(rgTagColumns);

    if (!forbiddenHeaders.isEmpty()) {
        throw new PicardException("Illegal ReadGroup tags in library params(barcode params) file(" + LIBRARY_PARAMS.getAbsolutePath() + ") Offending headers = " + StringUtil.join(", ", forbiddenHeaders));
    }

    for (final String column : rgTagColumns) {
        if (column.length() > 2) {
            throw new PicardException("Column label (" + column + ") unrecognized.  Library params(barcode params) can only contain the columns " +
                    "(OUTPUT, LIBRARY_NAME, SAMPLE_ALIAS, BARCODE, BARCODE_<X> where X is a positive integer) OR two letter RG tags!");
        }
    }
}
 
Example #12
Source File: CheckIlluminaDirectory.java    From picard with MIT License 6 votes vote down vote up
@Override
protected String[] customCommandLineValidation() {
    IOUtil.assertDirectoryIsReadable(BASECALLS_DIR);
    final List<String> errors = new ArrayList<>();

    for (final Integer lane : LANES) {
        if (lane < 1) {
            errors.add(
                    "LANES must be greater than or equal to 1.  LANES passed in " + StringUtil.join(", ", LANES));
            break;
        }
    }

    if (errors.isEmpty()) {
        return null;
    } else {
        return errors.toArray(new String[errors.size()]);
    }
}
 
Example #13
Source File: IlluminaBasecallsToFastqTest.java    From picard with MIT License 6 votes vote down vote up
private void convertParamsFile(String libraryParamsFile, int concatNColumnFields, File testDataDir, File outputDir, File libraryParams, List<File> outputPrefixes) throws FileNotFoundException {
    try (LineReader reader = new BufferedLineReader(new FileInputStream(new File(testDataDir, libraryParamsFile)))) {
        final PrintWriter writer = new PrintWriter(libraryParams);
        final String header = reader.readLine();
        writer.println(header + "\tOUTPUT_PREFIX");
        while (true) {
            final String line = reader.readLine();
            if (line == null) {
                break;
            }
            final String[] fields = line.split("\t");
            final File outputPrefix = new File(outputDir, StringUtil.join("", Arrays.copyOfRange(fields, 0, concatNColumnFields)));
            outputPrefixes.add(outputPrefix);
            writer.println(line + "\t" + outputPrefix);
        }
        writer.close();
    }
}
 
Example #14
Source File: Test.java    From picard with MIT License 6 votes vote down vote up
public void run() {
    final int ITERATIONS = 1000000;
    final String[] fields = new String[10000];
    final StopWatch watch = new StopWatch();

    watch.start();
    for (int i=0; i<ITERATIONS; ++i) {
        if (StringUtil.split(TEXT, fields, '\t') > 100) {
            System.out.println("Mama Mia that's a lot of tokens!!");
        }
    }
    watch.stop();
    System.out.println("StringUtil.split() took " + watch.getElapsedTime());
    watch.reset();

    watch.start();
    for (int i=0; i<ITERATIONS; ++i) {
        if (split(TEXT, fields, "\t") > 100) {
            System.out.println("Mama Mia that's a lot of tokens!!");
        }
    }
    watch.stop();
    System.out.println("StringTokenizer took " + watch.getElapsedTime());
}
 
Example #15
Source File: CollapseTagWithContextTest.java    From Drop-seq with MIT License 6 votes vote down vote up
private final String alterBaseString(final String baseString, final int numChanges) {
     final byte[] bases = StringUtil.stringToBytes(baseString);
     if (numChanges > baseString.length())
throw new IllegalArgumentException("Too many changes requested");
     final Set<Integer> mutatedPositions = new HashSet<>();
     int changesSoFar = 0;
     while (changesSoFar < numChanges) {
         int positionToChange = random.nextInt(bases.length);
         while (mutatedPositions.contains(positionToChange))
	positionToChange = random.nextInt(bases.length);
         mutatedPositions.add(positionToChange);
         bases[positionToChange] = alterBase(bases[positionToChange]);
         ++changesSoFar;
     }
     return StringUtil.bytesToString(bases);
 }
 
Example #16
Source File: ContextAccumulator.java    From picard with MIT License 6 votes vote down vote up
/**
 * Fills a halfContextAccumulator by summing over the appropriate counts from a fullContextAccumulator.
 */
public void fillHalfRecords(final ContextAccumulator fullContextAccumulator, final int contextSize) {
    final String padding = StringUtil.repeatCharNTimes('N', contextSize);

    for (Map.Entry<String,AlignmentAccumulator[]> fullContext : fullContextAccumulator.artifactMap.entrySet()) {
        final String fullContextKey = fullContext.getKey();
        final char centralBase = fullContextKey.charAt(contextSize);
        final String leadingContextKey = fullContextKey.substring(0, contextSize) + centralBase + padding;
        final String trailingContextKey = padding + centralBase + fullContextKey.substring(contextSize + 1, fullContextKey.length());

        final AlignmentAccumulator[] trailingAlignmentAccumulators = this.artifactMap.get(trailingContextKey);
        final AlignmentAccumulator[] leadingAlignmentAccumulators = this.artifactMap.get(leadingContextKey);
        final AlignmentAccumulator[] fullAlignmentAccumulators = fullContext.getValue();

        for (int i=0; i < fullAlignmentAccumulators.length; i++) {
            trailingAlignmentAccumulators[i].merge(fullAlignmentAccumulators[i]);
            leadingAlignmentAccumulators[i].merge(fullAlignmentAccumulators[i]);
        }
    }
}
 
Example #17
Source File: FastqToSam.java    From picard with MIT License 6 votes vote down vote up
private SAMRecord createSamRecord(final SAMFileHeader header, final String baseName, final FastqRecord frec, final boolean paired) {
    final SAMRecord srec = new SAMRecord(header);
    srec.setReadName(baseName);
    srec.setReadString(frec.getReadString());
    srec.setReadUnmappedFlag(true);
    srec.setAttribute(ReservedTagConstants.READ_GROUP_ID, READ_GROUP_NAME);
    final byte[] quals = StringUtil.stringToBytes(frec.getBaseQualityString());
    convertQuality(quals, QUALITY_FORMAT);
    for (final byte qual : quals) {
        final int uQual = qual & 0xff;
        if (uQual < MIN_Q || uQual > MAX_Q) {
            throw new PicardException("Base quality " + uQual + " is not in the range " + MIN_Q + ".." +
            MAX_Q + " for read " + frec.getReadHeader());
        }
    }
    srec.setBaseQualities(quals);

    if (paired) {
        srec.setReadPairedFlag(true);
        srec.setMateUnmappedFlag(true);
    }
    return srec ;
}
 
Example #18
Source File: FastqToSam.java    From picard with MIT License 6 votes vote down vote up
/** Returns read baseName and asserts correct pair read name format:
 * <ul>
 * <li> Paired reads must either have the exact same read names or they must contain at least one "/"
 * <li> and the First pair read name must end with "/1" and second pair read name ends with "/2"
 * <li> The baseName (read name part before the /) must be the same for both read names
 * <li> If the read names are exactly the same but end in "/2" or "/1" then an exception will be thrown
 * </ul>
 */
String getBaseName(final String readName1, final String readName2, final FastqReader freader1, final FastqReader freader2) {
    String [] toks = getReadNameTokens(readName1, 1, freader1);
    final String baseName1 = toks[0] ;
    final String num1 = toks[1] ;

    toks = getReadNameTokens(readName2, 2, freader2);
    final String baseName2 = toks[0] ;
    final String num2 = toks[1];

    if (!baseName1.equals(baseName2)) {
        throw new PicardException(String.format("In paired mode, read name 1 (%s) does not match read name 2 (%s)", baseName1,baseName2));
    }

    final boolean num1Blank = StringUtil.isBlank(num1);
    final boolean num2Blank = StringUtil.isBlank(num2);
    if (num1Blank || num2Blank) {
        if(!num1Blank) throw new PicardException(error(freader1,"Pair 1 number is missing (" +readName1+ "). Both pair numbers must be present or neither."));       //num1 != blank and num2   == blank
        else if(!num2Blank) throw new PicardException(error(freader2, "Pair 2 number is missing (" +readName2+ "). Both pair numbers must be present or neither.")); //num1 == blank and num =2 != blank
    } else {
        if (!num1.equals("1")) throw new PicardException(error(freader1,"Pair 1 number must be 1 ("+readName1+")"));
        if (!num2.equals("2")) throw new PicardException(error(freader2,"Pair 2 number must be 2 ("+readName2+")"));
    }

    return baseName1 ;
}
 
Example #19
Source File: MeanQualityByCycleSpark.java    From gatk with BSD 3-Clause "New" or "Revised" License 6 votes vote down vote up
private void saveResults(final MetricsFile<?, Integer> metrics, final SAMFileHeader readsHeader, final String inputFileName){
    MetricsUtils.saveMetrics(metrics, out);

    if (metrics.getAllHistograms().isEmpty()) {
        logger.warn("No valid bases found in input file.");
    } else if (chartOutput != null){
        // Now run R to generate a chart

        // If we're working with a single library, assign that library's name
        // as a suffix to the plot title
        final List<SAMReadGroupRecord> readGroups = readsHeader.getReadGroups();

        /*
         * A subtitle for the plot, usually corresponding to a library.
         */
        String plotSubtitle = "";
        if (readGroups.size() == 1) {
            plotSubtitle = StringUtil.asEmptyIfNull(readGroups.get(0).getLibrary());
        }
        final RScriptExecutor executor = new RScriptExecutor();
        executor.addScript(getMeanQualityByCycleRScriptResource());
        executor.addArgs(out, chartOutput.getAbsolutePath(), inputFileName, plotSubtitle);
        executor.exec();
    }
}
 
Example #20
Source File: IlluminaBasecallsToFastq.java    From picard with MIT License 5 votes vote down vote up
private void makeFastqRecords(final FastqRecord[] recs, final int[] indices,
                              final ClusterData cluster, final boolean appendReadNumberSuffix) {
    for (short i = 0; i < indices.length; ++i) {
        final ReadData readData = cluster.getRead(indices[i]);
        final String readBases = StringUtil.bytesToString(readData.getBases()).replace('.', 'N');
        final String readName = readNameEncoder.generateReadName(cluster, appendReadNumberSuffix ? i + 1 : null);
        recs[i] = new FastqRecord(
                readName,
                readBases,
                null,
                SAMUtils.phredToFastq(readData.getQualities())
        );
    }
}
 
Example #21
Source File: IlluminaBasecallsToFastq.java    From picard with MIT License 5 votes vote down vote up
/**
 * For each line in the MULTIPLEX_PARAMS file create a FastqRecordsWriter and put it in the sampleBarcodeFastqWriterMap map,
 * where the key to the map is the concatenation of all sampleBarcodes in order for the given line.
 */
private void populateWritersFromMultiplexParams() {
    final TabbedTextFileWithHeaderParser libraryParamsParser = new TabbedTextFileWithHeaderParser(MULTIPLEX_PARAMS);

    final Set<String> expectedColumnLabels = CollectionUtil.makeSet("OUTPUT_PREFIX");
    final List<String> sampleBarcodeColumnLabels = new ArrayList<>();
    for (int i = 1; i <= readStructure.sampleBarcodes.length(); i++) {
        sampleBarcodeColumnLabels.add("BARCODE_" + i);
    }

    expectedColumnLabels.addAll(sampleBarcodeColumnLabels);
    assertExpectedColumns(libraryParamsParser.columnLabels(), expectedColumnLabels);

    for (final TabbedTextFileWithHeaderParser.Row row : libraryParamsParser) {
        List<String> sampleBarcodeValues = null;

        if (!sampleBarcodeColumnLabels.isEmpty()) {
            sampleBarcodeValues = new ArrayList<>();
            for (final String sampleBarcodeLabel : sampleBarcodeColumnLabels) {
                sampleBarcodeValues.add(row.getField(sampleBarcodeLabel));
            }
        }

        final String key = (sampleBarcodeValues == null || sampleBarcodeValues.contains("N")) ? null : StringUtil.join("", sampleBarcodeValues);
        if (sampleBarcodeFastqWriterMap.containsKey(key)) {    //This will catch the case of having more than 1 line in a non-barcoded MULTIPLEX_PARAMS file
            throw new PicardException("Row for barcode " + key + " appears more than once in MULTIPLEX_PARAMS file " +
                    MULTIPLEX_PARAMS);
        }

        final FastqRecordsWriter writer = buildWriter(new File(row.getField("OUTPUT_PREFIX")));
        sampleBarcodeFastqWriterMap.put(key, writer);
    }
    if (sampleBarcodeFastqWriterMap.isEmpty()) {
        throw new PicardException("MULTIPLEX_PARAMS file " + MULTIPLEX_PARAMS + " does have any data rows.");
    }
    libraryParamsParser.close();
}
 
Example #22
Source File: PerTileParser.java    From picard with MIT License 5 votes vote down vote up
public PerTileParser(final IlluminaFileMap tilesToFiles, final int nextTile) {
    this.tileToFiles = tilesToFiles;
    this.currentTile = null;
    this.nextTile = nextTile;

    if(!tilesToFiles.containsKey(nextTile)) {
        throw new IllegalArgumentException("NextTile (" + nextTile + ") is not contained by tilesToFiles (" + StringUtil.join(",", new ArrayList<Integer>(tilesToFiles.keySet())));
    }
}
 
Example #23
Source File: PerTileParser.java    From picard with MIT License 5 votes vote down vote up
public void seekToTile(int oneBasedTileNumber) {
    nextTile = oneBasedTileNumber;

    if(!tileToFiles.containsKey(oneBasedTileNumber)) {
        throw new PicardException("PerTileParser does not contain key(" + oneBasedTileNumber +") keys available (" + StringUtil.join(",", new ArrayList<Integer>(tileToFiles.keySet())) + ")");
    }

    if(currentIterator != null) {
        currentIterator.close();
    }
    currentIterator = null;
}
 
Example #24
Source File: ClusterIntensityFileReader.java    From picard with MIT License 5 votes vote down vote up
public ClusterIntensityFileHeader(final byte[] headerBytes, final File file) {
    if(headerBytes.length < HEADER_SIZE) {
        throw new PicardException("Bytes past to header constructor are too short excpected(" + HEADER_SIZE + ") received (" + headerBytes.length);
    }

    ByteBuffer buf = ByteBuffer.allocate(headerBytes.length); //for doing some byte conversions
    buf.order(ByteOrder.LITTLE_ENDIAN);
    buf.put(headerBytes);
    buf.position(0);

    final byte[] identifierBuf = new byte[IDENTIFIER.length];
    buf.get(identifierBuf);
    if (!Arrays.equals(identifierBuf, IDENTIFIER)) {
        throw new PicardException("Cluster intensity file " + file + " contains unexpected header: " +
                StringUtil.bytesToString(identifierBuf));
    }
    final byte fileVersion = buf.get();
    if (fileVersion != FILE_VERSION) {
        throw new PicardException("Cluster intensity file " + file + " contains unexpected version: " + fileVersion);
    }
    elementSize = buf.get();
    if (elementSize < 1 || elementSize > 2) {
        throw new PicardException("Cluster intensity file " + file + " contains unexpected element size: " + elementSize);
    }
    // convert these to unsigned
    firstCycle = UnsignedTypeUtil.uShortToInt(buf.getShort());
    numCycles = UnsignedTypeUtil.uShortToInt(buf.getShort());
    if (numCycles == 0) {
        throw new PicardException("Cluster intensity file " + file + " has zero cycles.");
    }
    numClusters = buf.getInt();
    if (numClusters < 0) {
        // It is possible for there to be no clusters in a tile.
        throw new PicardException("Cluster intensity file " + file + " has negative number of clusters: " +numClusters);
    }
}
 
Example #25
Source File: IlluminaUtil.java    From picard with MIT License 5 votes vote down vote up
/**
 * Concatenates all the barcode sequences with BARCODE_DELIMITER
 * @param barcodes
 * @return A single string representation of all the barcodes
 */
public static String byteArrayToString(final byte[][] barcodes, String delim) {
    final String[] bcs = new String[barcodes.length];
    for (int i = 0; i < barcodes.length; i++) {
        bcs[i] = StringUtil.bytesToString(barcodes[i]);
    }
    return stringSeqsToString(bcs, delim);
}
 
Example #26
Source File: IlluminaUtil.java    From picard with MIT License 5 votes vote down vote up
private IlluminaAdapterPair(final String fivePrime, final String threePrime) {
    this.threePrime = threePrime;
    this.threePrimeBytes = StringUtil.stringToBytes(threePrime);

    this.fivePrime = fivePrime;
    this.fivePrimeReadOrder = SequenceUtil.reverseComplement(fivePrime);
    this.fivePrimeBytes = StringUtil.stringToBytes(fivePrime);
    this.fivePrimeReadOrderBytes = StringUtil.stringToBytes(fivePrimeReadOrder);
}
 
Example #27
Source File: Snp.java    From picard with MIT License 5 votes vote down vote up
public Snp(final String name, final String chrom, final int pos, final byte allele1, final byte allele2,
           final double maf, final List<String> fingerprintPanels) {
    this.name = name;
    this.chrom = chrom;
    this.pos = pos;
    this.allele1 = StringUtil.toUpperCase(allele1);
    this.allele2 = StringUtil.toUpperCase(allele2);
    this.maf = maf;
    this.fingerprintPanels = fingerprintPanels == null ? new ArrayList<String>() : fingerprintPanels;

    // Construct the genotypes for ease of comparison
    this.genotypes[0] = DiploidGenotype.fromBases(allele1, allele1);
    this.genotypes[1] = DiploidGenotype.fromBases(allele1, allele2);
    this.genotypes[2] = DiploidGenotype.fromBases(allele2, allele2);
}
 
Example #28
Source File: IlluminaBasecallsToFastq.java    From picard with MIT License 5 votes vote down vote up
/**
 * Assert that expectedCols are present
 *
 * @param actualCols   The columns present in the MULTIPLEX_PARAMS file
 * @param expectedCols The columns that are REQUIRED
 */
private void assertExpectedColumns(final Set<String> actualCols, final Set<String> expectedCols) {
    final Set<String> missingColumns = new HashSet<>(expectedCols);
    missingColumns.removeAll(actualCols);

    if (!missingColumns.isEmpty()) {
        throw new PicardException(String.format(
                "MULTIPLEX_PARAMS file %s is missing the following columns: %s.",
                MULTIPLEX_PARAMS.getAbsolutePath(), StringUtil.join(", ", missingColumns
                )));
    }
}
 
Example #29
Source File: ExtractIlluminaBarcodes.java    From picard with MIT License 5 votes vote down vote up
public BarcodeMetric(final String barcodeName, final String libraryName,
                     final String barcodeDisplay, final String[] barcodeSeqs) {

    this.BARCODE = barcodeDisplay;
    this.BARCODE_WITHOUT_DELIMITER = barcodeDisplay.replaceAll(IlluminaUtil.BARCODE_DELIMITER, "");
    this.BARCODE_NAME = barcodeName;
    this.LIBRARY_NAME = libraryName;
    this.barcodeBytes = new byte[barcodeSeqs.length][];
    for (int i = 0; i < barcodeSeqs.length; i++) {
        barcodeBytes[i] = htsjdk.samtools.util.StringUtil.stringToBytes(barcodeSeqs[i]);
    }
}
 
Example #30
Source File: ValidateReference.java    From Drop-seq with MIT License 5 votes vote down vote up
private void validateReferenceBases(File referenceFile) {
    final ReferenceSequenceFile refSeqFile = ReferenceSequenceFileFactory.getReferenceSequenceFile(referenceFile, true);
    ReferenceSequence sequence;
    while ((sequence = refSeqFile.nextSequence()) != null) {
        for (final byte base: sequence.getBases()) {
            if (!IUPAC_TABLE[base]) {
                messages.baseErrors = String.format("WARNING: AT least one invalid base '%c' (decimal %d) in reference sequence named %s",
                        StringUtil.byteToChar(base), base, sequence.getName());
                break;
            }
        }
    }
}