Java Code Examples for htsjdk.samtools.util.StringUtil

The following examples show how to use htsjdk.samtools.util.StringUtil. These examples are extracted from open source projects. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source Project: picard   Source File: Test.java    License: MIT License 6 votes vote down vote up
public void run() {
    final int ITERATIONS = 1000000;
    final String[] fields = new String[10000];
    final StopWatch watch = new StopWatch();

    watch.start();
    for (int i=0; i<ITERATIONS; ++i) {
        if (StringUtil.split(TEXT, fields, '\t') > 100) {
            System.out.println("Mama Mia that's a lot of tokens!!");
        }
    }
    watch.stop();
    System.out.println("StringUtil.split() took " + watch.getElapsedTime());
    watch.reset();

    watch.start();
    for (int i=0; i<ITERATIONS; ++i) {
        if (split(TEXT, fields, "\t") > 100) {
            System.out.println("Mama Mia that's a lot of tokens!!");
        }
    }
    watch.stop();
    System.out.println("StringTokenizer took " + watch.getElapsedTime());
}
 
Example 2
Source Project: Drop-seq   Source File: CollapseTagWithContext.java    License: MIT License 6 votes vote down vote up
private void writeMetrics (final boolean writeEditDistanceDistribution, final String context, final AdaptiveMappingResult r, final PrintStream out) {
	if (out==null) return;
	List<EditDistanceMappingMetric> metricList= r.getMetricResult();

	for (EditDistanceMappingMetric edmm: metricList) {
		edmm.getOriginalObservations();
		// Steve reports the number of barcodes including the one that everything is merged into.
		List<String> line = new ArrayList<>(Arrays.asList(context, edmm.getBarcode(), Integer.toString(edmm.getNumMergedBarcodes()+1), Integer.toString(edmm.getEditDistanceDiscovered()), Integer.toString(edmm.getEditDistanceUsed()),
				Integer.toString(edmm.getOriginalObservations()), Integer.toString(edmm.getTotalObservations())));

		if (writeEditDistanceDistribution) {
			int [] edList = edmm.getEdList();
			if (edList.length>0) {
				Integer[] x = Arrays.stream( edList ).boxed().toArray( Integer[]::new );
				String edFormatted = StringUtil.join(",", x);
				line.add(edFormatted);
			} else
				line.add("NA");

		}
		out.println(StringUtil.join("\t", line));

	}
}
 
Example 3
Source Project: Drop-seq   Source File: TrimSequenceTemplate.java    License: MIT License 6 votes vote down vote up
/**
 * Test to see if this read matches this barcode. If any base of a barcode
 * starts with N or n, then ignore that position.
 *
 * @param testString
 *            The read to look for this barcode in. The barcode should be at
 *            the start of the read for this method.  The entire barcode is expected for a match.
 * @return true if this barcode is found in the read.
 */
public boolean hasForwardMatch(final String testString) {
	byte[] testBases = StringUtil.stringToBytes(testString);
	int numBasesCanMatch = 0;
	int numBasesMatch = 0;
	for (int i = 0; i < bases.length; i++) {
		if (isIgnoreBase(this.bases[i]))
			continue;
		numBasesCanMatch++;
		if (SequenceUtil.basesEqual(testBases[i], bases[i]))
			numBasesMatch++;
	}
	if (numBasesCanMatch == numBasesMatch)
		return (true);
	return false;
}
 
Example 4
Source Project: Drop-seq   Source File: SNPUMIBasePileupTest.java    License: MIT License 6 votes vote down vote up
@Test(enabled=true)
/**
 * Add an uneven number of bases and quals to trip the exception throw.
 */
public void testAddBaseQualsError () {
	int snpPos=76227022;
	Interval snpInterval = new Interval("HUMAN_1", snpPos, snpPos, true, "test");
	SNPUMIBasePileup p = new SNPUMIBasePileup(snpInterval, "ACADM", "fake_cell", "AAAAA");
	char [] bases = {'A', 'A'};
	byte [] quals = {27,17,55};
	byte [] bases2 = new byte [bases.length];
	StringUtil.charsToBytes(bases, 0, bases.length, bases2, 0);
	boolean passes=false;
	try {
		p.setBasesAndQualities(bases2, quals);
	} catch (IllegalArgumentException e) {
		Assert.assertNotNull(e);
		passes=true;
	}
	Assert.assertTrue(passes);
}
 
Example 5
Source Project: Drop-seq   Source File: SNPUMIBasePileupTest.java    License: MIT License 6 votes vote down vote up
@Test(enabled=true)
public void testAddBaseQuals () {
	int snpPos=76227022;
	Interval snpInterval = new Interval("HUMAN_1", snpPos, snpPos, true, "test");
	SNPUMIBasePileup p = new SNPUMIBasePileup(snpInterval, "ACADM", "fake_cell", "AAAAA");
	char [] bases = {'A', 'A'};
	byte [] quals = {27,55};
	byte [] bases2 = new byte [bases.length];
	StringUtil.charsToBytes(bases, 0, bases.length, bases2, 0);
	boolean passes=true;
	try {
		p.setBasesAndQualities(bases2, quals);
	} catch (IllegalArgumentException e) {
		passes=false;
	}
	Assert.assertTrue(passes);
}
 
Example 6
Source Project: Drop-seq   Source File: LikelihoodUtilsTest.java    License: MIT License 6 votes vote down vote up
@Test(enabled=true)
public void testMixedLikelihoodMultiRead () {
	GenotypeType [] g = {GenotypeType.HOM_REF, GenotypeType.HET, GenotypeType.HOM_VAR};
	List<GenotypeType> genotypes  = Arrays.asList(g);

	Double [] m = {new Double(2), new Double(1), new Double(1)};
	List<Double> mixture  = Arrays.asList(m);

	char refAllele ='A';
	char altAllele ='T';

	Byte [] b = {StringUtil.charToByte('A'), StringUtil.charToByte('A')};
	List<Byte> bases = Arrays.asList(b);
	Byte [] q = {new Byte ((byte)10), new Byte ((byte)10)};
	List<Byte> qualities =Arrays.asList(q);

	double result = LikelihoodUtils.getInstance().getLogLikelihoodMixedModel(refAllele, altAllele, genotypes, mixture, bases, qualities, null, null, null);
	Assert.assertEquals(result, Math.log10(0.36), 0.001);

}
 
Example 7
Source Project: Drop-seq   Source File: CollapseTagWithContextTest.java    License: MIT License 6 votes vote down vote up
private final String alterBaseString(final String baseString, final int numChanges) {
     final byte[] bases = StringUtil.stringToBytes(baseString);
     if (numChanges > baseString.length())
throw new IllegalArgumentException("Too many changes requested");
     final Set<Integer> mutatedPositions = new HashSet<>();
     int changesSoFar = 0;
     while (changesSoFar < numChanges) {
         int positionToChange = random.nextInt(bases.length);
         while (mutatedPositions.contains(positionToChange))
	positionToChange = random.nextInt(bases.length);
         mutatedPositions.add(positionToChange);
         bases[positionToChange] = alterBase(bases[positionToChange]);
         ++changesSoFar;
     }
     return StringUtil.bytesToString(bases);
 }
 
Example 8
Source Project: picard   Source File: CheckIlluminaDirectory.java    License: MIT License 6 votes vote down vote up
@Override
protected String[] customCommandLineValidation() {
    IOUtil.assertDirectoryIsReadable(BASECALLS_DIR);
    final List<String> errors = new ArrayList<>();

    for (final Integer lane : LANES) {
        if (lane < 1) {
            errors.add(
                    "LANES must be greater than or equal to 1.  LANES passed in " + StringUtil.join(", ", LANES));
            break;
        }
    }

    if (errors.isEmpty()) {
        return null;
    } else {
        return errors.toArray(new String[errors.size()]);
    }
}
 
Example 9
/**
 * Create one SAMSequenceRecord from a single fasta sequence
 */
private SAMSequenceRecord makeSequenceRecord(final ReferenceSequence refSeq) {
  final SAMSequenceRecord ret = new SAMSequenceRecord(refSeq.getName(), refSeq.length());

  // Compute MD5 of upcased bases
  final byte[] bases = refSeq.getBases();
  for (int i = 0; i < bases.length; ++i) {
    bases[i] = StringUtil.toUpperCase(bases[i]);
  }

  ret.setAttribute(SAMSequenceRecord.MD5_TAG, md5Hash(bases));
  if (GENOME_ASSEMBLY != null) {
    ret.setAttribute(SAMSequenceRecord.ASSEMBLY_TAG, GENOME_ASSEMBLY);
  }
  ret.setAttribute(SAMSequenceRecord.URI_TAG, URI);
  if (SPECIES != null) {
    ret.setAttribute(SAMSequenceRecord.SPECIES_TAG, SPECIES);
  }
  return ret;
}
 
Example 10
Source Project: picard   Source File: CollectRnaSeqMetricsTest.java    License: MIT License 6 votes vote down vote up
public File getRefFlatFile(String sequence) throws Exception {
    // Create a refFlat file with a single gene containing two exons, one of which is overlapped by the
    // ribosomal interval.
    final String[] refFlatFields = new String[RefFlatColumns.values().length];
    refFlatFields[RefFlatColumns.GENE_NAME.ordinal()] = "myGene";
    refFlatFields[RefFlatColumns.TRANSCRIPT_NAME.ordinal()] = "myTranscript";
    refFlatFields[RefFlatColumns.CHROMOSOME.ordinal()] = sequence;
    refFlatFields[RefFlatColumns.STRAND.ordinal()] = "+";
    refFlatFields[RefFlatColumns.TX_START.ordinal()] = "49";
    refFlatFields[RefFlatColumns.TX_END.ordinal()] = "500";
    refFlatFields[RefFlatColumns.CDS_START.ordinal()] = "74";
    refFlatFields[RefFlatColumns.CDS_END.ordinal()] = "400";
    refFlatFields[RefFlatColumns.EXON_COUNT.ordinal()] = "2";
    refFlatFields[RefFlatColumns.EXON_STARTS.ordinal()] = "49,249";
    refFlatFields[RefFlatColumns.EXON_ENDS.ordinal()] = "200,500";

    final File refFlatFile = File.createTempFile("tmp.", ".refFlat");
    refFlatFile.deleteOnExit();
    final PrintStream refFlatStream = new PrintStream(refFlatFile);
    refFlatStream.println(StringUtil.join("\t", refFlatFields));
    refFlatStream.close();

    return refFlatFile;
}
 
Example 11
Source Project: picard   Source File: IlluminaDataProviderFactory.java    License: MIT License 6 votes vote down vote up
/**
 * Call this method to create a ClusterData iterator over the specified tiles.
 *
 * @return An iterator for reading the Illumina basecall output for the lane specified in the constructor.
 */
public BaseIlluminaDataProvider makeDataProvider(List<Integer> requestedTiles) {
    if (requestedTiles == null) {
        requestedTiles = availableTiles;
    } else {
        if (requestedTiles.isEmpty()) {
            throw new PicardException("Zero length tile list supplied to makeDataProvider, you must specify at least 1 tile OR pass NULL to use all available tiles");
        }
    }

    final Map<IlluminaParser, Set<IlluminaDataType>> parsersToDataType = new HashMap<>();
    for (final Map.Entry<SupportedIlluminaFormat, Set<IlluminaDataType>> fmToDt : formatToDataTypes.entrySet()) {
        parsersToDataType.put(makeParser(fmToDt.getKey(), requestedTiles), fmToDt.getValue());
    }

    log.debug("The following parsers will be used by IlluminaDataProvider: " + StringUtil.join("," + parsersToDataType.keySet()));

    return new IlluminaDataProvider(outputMapping, parsersToDataType, basecallDirectory, lane);
}
 
Example 12
Source Project: picard   Source File: IlluminaBasecallsToSam.java    License: MIT License 6 votes vote down vote up
/**
 * Assert that expectedCols are present and return actualCols - expectedCols
 *
 * @param actualCols   The columns present in the LIBRARY_PARAMS file
 * @param expectedCols The columns that are REQUIRED
 * @return actualCols - expectedCols
 */
private Set<String> findAndFilterExpectedColumns(final Set<String> actualCols, final Set<String> expectedCols) {
    final Set<String> missingColumns = new HashSet<>(expectedCols);
    missingColumns.removeAll(actualCols);

    if (!missingColumns.isEmpty()) {
        throw new PicardException(String.format(
                "LIBRARY_PARAMS file %s is missing the following columns: %s.",
                LIBRARY_PARAMS.getAbsolutePath(), StringUtil.join(", ", missingColumns
                )));
    }

    final Set<String> remainingColumns = new HashSet<>(actualCols);
    remainingColumns.removeAll(expectedCols);
    return remainingColumns;
}
 
Example 13
Source Project: picard   Source File: IlluminaBasecallsToSam.java    License: MIT License 6 votes vote down vote up
/**
 * Given a set of columns assert that all columns conform to the format of an RG header attribute (i.e. 2 letters)
 * the attribute is NOT a member of the rgHeaderTags that are built by default in buildSamHeaderParameters
 *
 * @param rgTagColumns A set of columns that should conform to the rg header attribute format
 */
private void checkRgTagColumns(final Set<String> rgTagColumns) {
    final Set<String> forbiddenHeaders = buildSamHeaderParameters(null).keySet();
    forbiddenHeaders.retainAll(rgTagColumns);

    if (!forbiddenHeaders.isEmpty()) {
        throw new PicardException("Illegal ReadGroup tags in library params(barcode params) file(" + LIBRARY_PARAMS.getAbsolutePath() + ") Offending headers = " + StringUtil.join(", ", forbiddenHeaders));
    }

    for (final String column : rgTagColumns) {
        if (column.length() > 2) {
            throw new PicardException("Column label (" + column + ") unrecognized.  Library params(barcode params) can only contain the columns " +
                    "(OUTPUT, LIBRARY_NAME, SAMPLE_ALIAS, BARCODE, BARCODE_<X> where X is a positive integer) OR two letter RG tags!");
        }
    }
}
 
Example 14
Source Project: picard   Source File: FastqToSam.java    License: MIT License 6 votes vote down vote up
private SAMRecord createSamRecord(final SAMFileHeader header, final String baseName, final FastqRecord frec, final boolean paired) {
    final SAMRecord srec = new SAMRecord(header);
    srec.setReadName(baseName);
    srec.setReadString(frec.getReadString());
    srec.setReadUnmappedFlag(true);
    srec.setAttribute(ReservedTagConstants.READ_GROUP_ID, READ_GROUP_NAME);
    final byte[] quals = StringUtil.stringToBytes(frec.getBaseQualityString());
    convertQuality(quals, QUALITY_FORMAT);
    for (final byte qual : quals) {
        final int uQual = qual & 0xff;
        if (uQual < MIN_Q || uQual > MAX_Q) {
            throw new PicardException("Base quality " + uQual + " is not in the range " + MIN_Q + ".." +
            MAX_Q + " for read " + frec.getReadHeader());
        }
    }
    srec.setBaseQualities(quals);

    if (paired) {
        srec.setReadPairedFlag(true);
        srec.setMateUnmappedFlag(true);
    }
    return srec ;
}
 
Example 15
Source Project: picard   Source File: FastqToSam.java    License: MIT License 6 votes vote down vote up
/** Returns read baseName and asserts correct pair read name format:
 * <ul>
 * <li> Paired reads must either have the exact same read names or they must contain at least one "/"
 * <li> and the First pair read name must end with "/1" and second pair read name ends with "/2"
 * <li> The baseName (read name part before the /) must be the same for both read names
 * <li> If the read names are exactly the same but end in "/2" or "/1" then an exception will be thrown
 * </ul>
 */
String getBaseName(final String readName1, final String readName2, final FastqReader freader1, final FastqReader freader2) {
    String [] toks = getReadNameTokens(readName1, 1, freader1);
    final String baseName1 = toks[0] ;
    final String num1 = toks[1] ;

    toks = getReadNameTokens(readName2, 2, freader2);
    final String baseName2 = toks[0] ;
    final String num2 = toks[1];

    if (!baseName1.equals(baseName2)) {
        throw new PicardException(String.format("In paired mode, read name 1 (%s) does not match read name 2 (%s)", baseName1,baseName2));
    }

    final boolean num1Blank = StringUtil.isBlank(num1);
    final boolean num2Blank = StringUtil.isBlank(num2);
    if (num1Blank || num2Blank) {
        if(!num1Blank) throw new PicardException(error(freader1,"Pair 1 number is missing (" +readName1+ "). Both pair numbers must be present or neither."));       //num1 != blank and num2   == blank
        else if(!num2Blank) throw new PicardException(error(freader2, "Pair 2 number is missing (" +readName2+ "). Both pair numbers must be present or neither.")); //num1 == blank and num =2 != blank
    } else {
        if (!num1.equals("1")) throw new PicardException(error(freader1,"Pair 1 number must be 1 ("+readName1+")"));
        if (!num2.equals("2")) throw new PicardException(error(freader2,"Pair 2 number must be 2 ("+readName2+")"));
    }

    return baseName1 ;
}
 
Example 16
private void saveResults(final MetricsFile<?, Integer> metrics, final SAMFileHeader readsHeader, final String inputFileName){
    MetricsUtils.saveMetrics(metrics, out);

    if (metrics.getAllHistograms().isEmpty()) {
        logger.warn("No valid bases found in input file.");
    } else if (chartOutput != null){
        // Now run R to generate a chart

        // If we're working with a single library, assign that library's name
        // as a suffix to the plot title
        final List<SAMReadGroupRecord> readGroups = readsHeader.getReadGroups();

        /*
         * A subtitle for the plot, usually corresponding to a library.
         */
        String plotSubtitle = "";
        if (readGroups.size() == 1) {
            plotSubtitle = StringUtil.asEmptyIfNull(readGroups.get(0).getLibrary());
        }
        final RScriptExecutor executor = new RScriptExecutor();
        executor.addScript(getMeanQualityByCycleRScriptResource());
        executor.addArgs(out, chartOutput.getAbsolutePath(), inputFileName, plotSubtitle);
        executor.exec();
    }
}
 
Example 17
Source Project: picard   Source File: ContextAccumulator.java    License: MIT License 6 votes vote down vote up
/**
 * Fills a halfContextAccumulator by summing over the appropriate counts from a fullContextAccumulator.
 */
public void fillHalfRecords(final ContextAccumulator fullContextAccumulator, final int contextSize) {
    final String padding = StringUtil.repeatCharNTimes('N', contextSize);

    for (Map.Entry<String,AlignmentAccumulator[]> fullContext : fullContextAccumulator.artifactMap.entrySet()) {
        final String fullContextKey = fullContext.getKey();
        final char centralBase = fullContextKey.charAt(contextSize);
        final String leadingContextKey = fullContextKey.substring(0, contextSize) + centralBase + padding;
        final String trailingContextKey = padding + centralBase + fullContextKey.substring(contextSize + 1, fullContextKey.length());

        final AlignmentAccumulator[] trailingAlignmentAccumulators = this.artifactMap.get(trailingContextKey);
        final AlignmentAccumulator[] leadingAlignmentAccumulators = this.artifactMap.get(leadingContextKey);
        final AlignmentAccumulator[] fullAlignmentAccumulators = fullContext.getValue();

        for (int i=0; i < fullAlignmentAccumulators.length; i++) {
            trailingAlignmentAccumulators[i].merge(fullAlignmentAccumulators[i]);
            leadingAlignmentAccumulators[i].merge(fullAlignmentAccumulators[i]);
        }
    }
}
 
Example 18
Source Project: picard   Source File: ContextAccumulator.java    License: MIT License 6 votes vote down vote up
/**
 * Fills a zeroContextAccumulator by summing over the appropriate counts from a fullContextAccumulator.
 */
public void fillZeroRecords(final ContextAccumulator fullContextAccumulator, final int contextSize) {
    final String padding = StringUtil.repeatCharNTimes('N', contextSize);

    for (Map.Entry<String,AlignmentAccumulator[]> fullContext : fullContextAccumulator.artifactMap.entrySet()) {
        final String fullContextKey = fullContext.getKey();
        final char centralBase = fullContextKey.charAt(contextSize);
        final String zeroContextKey = padding + centralBase + padding;

        final AlignmentAccumulator[] zeroAlignmentAccumulators = this.artifactMap.get(zeroContextKey);
        final AlignmentAccumulator[] fullAlignmentAccumulators = fullContext.getValue();

        for (int i=0; i < fullAlignmentAccumulators.length; i++) {
            zeroAlignmentAccumulators[i].merge(fullAlignmentAccumulators[i]);
        }
    }
}
 
Example 19
Source Project: picard   Source File: IlluminaBasecallsToFastqTest.java    License: MIT License 6 votes vote down vote up
private void convertParamsFile(String libraryParamsFile, int concatNColumnFields, File testDataDir, File outputDir, File libraryParams, List<File> outputPrefixes) throws FileNotFoundException {
    try (LineReader reader = new BufferedLineReader(new FileInputStream(new File(testDataDir, libraryParamsFile)))) {
        final PrintWriter writer = new PrintWriter(libraryParams);
        final String header = reader.readLine();
        writer.println(header + "\tOUTPUT_PREFIX");
        while (true) {
            final String line = reader.readLine();
            if (line == null) {
                break;
            }
            final String[] fields = line.split("\t");
            final File outputPrefix = new File(outputDir, StringUtil.join("", Arrays.copyOfRange(fields, 0, concatNColumnFields)));
            outputPrefixes.add(outputPrefix);
            writer.println(line + "\t" + outputPrefix);
        }
        writer.close();
    }
}
 
Example 20
Source Project: Drop-seq   Source File: ValidateReference.java    License: MIT License 5 votes vote down vote up
private void validateReferenceBases(File referenceFile) {
    final ReferenceSequenceFile refSeqFile = ReferenceSequenceFileFactory.getReferenceSequenceFile(referenceFile, true);
    ReferenceSequence sequence;
    while ((sequence = refSeqFile.nextSequence()) != null) {
        for (final byte base: sequence.getBases()) {
            if (!IUPAC_TABLE[base]) {
                messages.baseErrors = String.format("WARNING: AT least one invalid base '%c' (decimal %d) in reference sequence named %s",
                        StringUtil.byteToChar(base), base, sequence.getName());
                break;
            }
        }
    }
}
 
Example 21
Source Project: Drop-seq   Source File: SNPBasePileUp.java    License: MIT License 5 votes vote down vote up
public int getCountBase (final char base) {
	Byte baseB = StringUtil.charToByte(base);
	int count=0;
	for (Byte b: bases)
		if (b.equals(baseB)) count++;
	return count;
}
 
Example 22
private static Tuple2<TestDataBreakEndVariants, TestDataBreakEndVariants> forInterChromosomeStrandSwitch55() {

        String contigName = "forInterChromosomeStrandSwitch55";
        byte[] contigSequence = "ATTCTGAGAAACTTCATTTTGATGTGTGCATTCATCTTCCAGAGTTGAAACTTTCTTTTGATTGTGTAGTTTTGAAACACTCTTTTTGTAGAATCTGCAAGGGGGTATTTGTAGGGATTTGAAGCCTATTGTTGAAAAGGTAATATCTTCACATAAAAACTACATAGGATCATTCGGAGAAACTTCTTTGTGATGTGTGCATTCAACTCACAGAGTTGAACCTATCTTTTTTTTTTTAATGTCTGTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTGGAGCAATTTGAGGCCTAAGGTGGAAAAGGAAATATTTTCACATAAAAACTAGACAGAAGAATTCTGTGAAACTTGTTCAGGACCTGTGCATTCATCTTACAGATTTGAATCTTTCTTTTGATTGAGCAGTTTGGAAACACTGTTTTTGTAGAATCTTCAGGTGGACATTCAGAGCACTTTGTGTCCTATGGTAGAAAAGGAAATATCTTCATA".getBytes();
        String homology = "";
        String insSeq = StringUtil.bytesToString(getReverseComplimentCopy(Arrays.copyOfRange(contigSequence, 230, 292)));
        AlignmentInterval firstAlignment = new AlignmentInterval(new SimpleInterval("chr21:10784600-10784829"), 1, 230, TextCigarCodec.decode("230M279S"), true, 60, 12, 170, ContigAlignmentsModifier.AlnModType.NONE);
        AlignmentInterval secondAlignment = new AlignmentInterval(new SimpleInterval("chr20:28817762-28817977"), 293, 509, TextCigarCodec.decode("292H93M1I123M"), false, 60, 11, 149, ContigAlignmentsModifier.AlnModType.NONE);
        SimpleChimera simpleChimera = new SimpleChimera(contigName, firstAlignment, secondAlignment, StrandSwitch.FORWARD_TO_REVERSE, false, Collections.emptyList(), NO_GOOD_MAPPING_TO_NON_CANONICAL_CHROMOSOME);
        SimpleInterval expectedLeftBreakpoint = new SimpleInterval("chr20:28817977-28817977");
        SimpleInterval expectedRightBreakpoint = new SimpleInterval("chr21:10784829-10784829");
        final BreakpointComplications expectedBreakpointComplications = new BreakpointComplications.InterChromosomeBreakpointComplications(homology, insSeq);
        NovelAdjacencyAndAltHaplotype expectedNovelAdjacencyAndAltSeq = new NovelAdjacencyAndAltHaplotype(expectedLeftBreakpoint, expectedRightBreakpoint, StrandSwitch.FORWARD_TO_REVERSE, expectedBreakpointComplications, TypeInferredFromSimpleChimera.INTER_CHR_STRAND_SWITCH_55, EMPTY_BYTE_ARRAY);
        final List<SvType> expectedSVTypes = Arrays.asList(
                makeBNDType("chr20", 28817977, "BND_chr20_28817977_chr21_10784829_1", Allele.create("A", true), Allele.create("A"+insSeq+"]chr21:10784829]"), Collections.emptyMap(), true, BreakEndVariantType.SupportedType.INTER_CHR_STRAND_SWITCH_55),
                makeBNDType("chr21", 10784829, "BND_chr20_28817977_chr21_10784829_2", Allele.create("T", true), Allele.create("T"+ SequenceUtil.reverseComplement(insSeq) +"]chr20:28817977]"), Collections.emptyMap(), false, BreakEndVariantType.SupportedType.INTER_CHR_STRAND_SWITCH_55)
        );
        final List<VariantContext> expectedVariants = Arrays.asList(
                addStandardAttributes(makeBND(expectedLeftBreakpoint, expectedRightBreakpoint, Allele.create("A", true), insSeq, "", true, true, true), contigName, 60, 216, homology, insSeq, "BND_chr20_28817977_chr21_10784829_2").make(),
                addStandardAttributes(makeBND(expectedLeftBreakpoint, expectedRightBreakpoint, Allele.create("T", true), SequenceUtil.reverseComplement(insSeq), "", false, true, true), contigName, 60, 216, homology, insSeq, "BND_chr20_28817977_chr21_10784829_1").make()
        );

        final TestDataBreakEndVariants forInterChromosomeStrandSwitch55_plus =
                new TestDataBreakEndVariants(firstAlignment, secondAlignment, contigName, contigSequence, true, simpleChimera, expectedNovelAdjacencyAndAltSeq, expectedSVTypes, expectedVariants, BreakpointsInference.InterChromosomeBreakpointsInference.class);


        firstAlignment = new AlignmentInterval(new SimpleInterval("chr20:28817762-28817977"), 1, 217, TextCigarCodec.decode("123M1I93M292H"), true, 60, 11, 149, ContigAlignmentsModifier.AlnModType.NONE);
        secondAlignment = new AlignmentInterval(new SimpleInterval("chr21:10784600-10784829"), 280, 509, TextCigarCodec.decode("279S230M"), false, 60, 12, 170, ContigAlignmentsModifier.AlnModType.NONE);
        simpleChimera = new SimpleChimera(contigName, firstAlignment, secondAlignment, StrandSwitch.FORWARD_TO_REVERSE, true, Collections.emptyList(), NO_GOOD_MAPPING_TO_NON_CANONICAL_CHROMOSOME);

        final TestDataBreakEndVariants forInterChromosomeStrandSwitch55_minus =
                new TestDataBreakEndVariants(firstAlignment, secondAlignment, contigName, getReverseComplimentCopy(contigSequence), false, simpleChimera, expectedNovelAdjacencyAndAltSeq, expectedSVTypes, expectedVariants, BreakpointsInference.InterChromosomeBreakpointsInference.class);

        return new Tuple2<>(forInterChromosomeStrandSwitch55_plus, forInterChromosomeStrandSwitch55_minus);
    }
 
Example 23
Source Project: Drop-seq   Source File: ReferenceUtils.java    License: MIT License 5 votes vote down vote up
public static byte [] setSequenceToN (final byte [] fastaRefBases, final Interval interval) {
	byte [] result = fastaRefBases;
	int startBase=interval.getStart();
	int endBase=interval.getEnd();
	// the byte [] is base 0, the coordinates are base 1.
	Arrays.fill(result, startBase-1, endBase, StringUtil.charToByte('N'));
	return (result);
}
 
Example 24
Source Project: Drop-seq   Source File: CollapseTagWithContext.java    License: MIT License 5 votes vote down vote up
private SAMFileWriter getWriter (final SamReader reader) {
	SAMFileHeader header = reader.getFileHeader();
	SamHeaderUtil.addPgRecord(header, this);
	String context = StringUtil.join(" ", this.CONTEXT_TAGS);
	header.addComment("Edit distance collapsed tag " +  this.COLLAPSE_TAG + " to new tag " + this.OUT_TAG+ " with edit distance "+ this.EDIT_DISTANCE + "using indels=" + this.FIND_INDELS + " in the context of tags [" + context + "]");
       SAMFileWriter writer= new SAMFileWriterFactory().makeSAMOrBAMWriter(header, false, this.OUTPUT);
       return writer;
}
 
Example 25
Source Project: Drop-seq   Source File: DetectBeadSubstitutionErrors.java    License: MIT License 5 votes vote down vote up
private void writeReport (final BottomUpCollapseResult result, final BottomUpCollapseResult resultClean, final UMIsPerCellResult umiResult) {
	PrintStream outReport = new ErrorCheckingPrintStream(IOUtil.openFileForWriting(this.OUTPUT_REPORT));

	// write comments section, each line starts with a "#"
	outReport.println("# FILTER_AMBIGUOUS="+FILTER_AMBIGUOUS);
	outReport.println("# MIN_UMIS_PER_CELL="+MIN_UMIS_PER_CELL);
	outReport.println("# UMI_BIAS_THRESHOLD="+MIN_UMIS_PER_CELL);
	outReport.println("# EDIT_DISTANCE="+MIN_UMIS_PER_CELL);
	outReport.println("#");
	outReport.println("# TOTAL_BARCODES_TESTED="+umiResult.getNumCellBarocodesTested());
	outReport.println("# BARCODES_COLLAPSED="+result.getUnambiguousSmallBarcodes().size());
	outReport.println("# ESTIMATED_UMIS_COLLAPSED="+getTotalAmbiguousUMIs(result.getUnambiguousSmallBarcodes(), umiResult.getUmisPerCell()));
	outReport.println("# AMBIGUOUS_BARCODES="+result.getAmbiguousBarcodes().size());
	outReport.println("# ESTIMATED_AMBIGUOUS_UMIS="+getTotalAmbiguousUMIs(result.getAmbiguousBarcodes(), umiResult.getUmisPerCell()));
	outReport.println("# POLY_T_BIASED_BARCODES="+umiResult.getPolyTBiasedBarcodes());
	outReport.println("# POLY_T_BIASED_BARRCODE_UMIS="+umiResult.getPolyTBiasedUMIs());
	outReport.println("# POLY_T_POSITION="+umiResult.getPolyTPosition());

	/// write header
	String [] header= {"intended_barcode", "neighbor_barcode", "intended_size", "neighbor_size", "position", "intended_base", "neighbor_base", "repaired"};
	outReport.println(StringUtil.join("\t", header));

	ObjectCounter<String> umiCounts=umiResult.getUmisPerCell();
	Iterator<String> smalls = result.getUnambiguousSmallBarcodes().iterator();
	while (smalls.hasNext()) {
		String small=smalls.next();
		String large = result.getLargerRelatedBarcode(small);
		BarcodeSubstitutionPair p = new BarcodeSubstitutionPair(large, small);
		String cleanLarger = resultClean.getLargerRelatedBarcode(small);
		boolean repaired = cleanLarger!=null;

		String [] body = {large, small, Integer.toString(umiCounts.getCountForKey(large)), Integer.toString(umiCounts.getCountForKey(small)),
				Integer.toString(p.getPosition()+1), p.getIntendedBase(), p.getNeighborBase(), Boolean.toString(repaired).toUpperCase()};
		outReport.println(StringUtil.join("\t", body));
	}

	CloserUtil.close(outReport);
}
 
Example 26
Source Project: Drop-seq   Source File: IntervalTagComparator.java    License: MIT License 5 votes vote down vote up
private String [] getFirstSplit(final SAMRecord rec) {

    	Object strIntervalRec = rec.getAttribute(tag);
    	if (!(strIntervalRec instanceof String))
			throw new IllegalArgumentException(SAMTagUtil.getSingleton().makeStringTag(this.tag) + " does not have a String value");
    	String intervalString = (String) strIntervalRec;
    	String [] result = new String [4];
    	StringUtil.splitConcatenateExcessTokens(intervalString, result, ENCODE_DELIMITER);
    	return (result);
    }
 
Example 27
Source Project: Drop-seq   Source File: IntervalTagComparator.java    License: MIT License 5 votes vote down vote up
private int [] parsePosition (final String posString) {
 	String [] posArray = new String [2];
 	StringUtil.split(posString, posArray, '-');
 	int start=Integer.parseInt(posArray[0]);
 	// set the end = the start, this changes if there's a second position in the field.
 	int end=Integer.parseInt(posArray[0]);
 	if (posArray[1]!=null)
end = Integer.parseInt(posArray[1]);

 	int [] result = {start,end};
 	return result;

 }
 
Example 28
Source Project: Drop-seq   Source File: MatrixMarketWriter.java    License: MIT License 5 votes vote down vote up
private void WriteDimensionLabels(final String dimensionName, final List<String> names) {
    try {
        for (int i = 0; i < names.size(); i += MatrixMarketConstants.NUM_HEADER_ELEMENTS_PER_ROW) {
            writer.write(MatrixMarketConstants.MM_STRUCTURED_COMMENT_LINE_START);
            writer.write(dimensionName);
            writer.write(MatrixMarketConstants.MM_HEADER_LIST_SEPARATOR);
            final int elementsToWrite = Math.min(MatrixMarketConstants.NUM_HEADER_ELEMENTS_PER_ROW, names.size() - i);
            writer.write(StringUtil.join(MatrixMarketConstants.MM_HEADER_LIST_SEPARATOR, names.subList(i, i + elementsToWrite)));
            writer.newLine();
        }
    } catch (IOException e) {
        throw new RuntimeIOException("Exception writing " + filename, e);
    }
}
 
Example 29
Source Project: Drop-seq   Source File: PolyAWithAdapterFinder.java    License: MIT License 5 votes vote down vote up
public PolyARun getPolyAStart(final String readString, final String adapterSequence) {
    final byte[] readBases = StringUtil.stringToBytes(readString);
    int adapterClipPosition = ClippingUtility.findIndexOfClipSequence(
            readBases,
            StringUtil.stringToBytes(adapterSequence),
            minAdapterMatch,
            maxAdapterErrorRate);
    if (adapterClipPosition == ClippingUtility.NO_MATCH) {
        adapterClipPosition = readString.length();
    } else if (adapterClipPosition == 0) {
        return new PolyARun(0, 0, 0);
    }
    final SimplePolyAFinder.PolyARun ret = getPolyARun(readString, adapterClipPosition);

    // If there was a short adapter match, but not enough poly A before it,
    // see if there would be enough poly A if the adapter considered not to match.
    if (ret.isNoMatch() && adapterClipPosition < readString.length() &&
            adapterClipPosition + dubiousAdapterMatchLength >= readString.length()) {
        // If did not find enough polyA looking before adapter, try again looking from end of read.
        final SimplePolyAFinder.PolyARun tryWithoutAdapter = getPolyARun(readString, readString.length());
        if (!tryWithoutAdapter.isNoMatch()) {
            return tryWithoutAdapter;
        }
    }
    return ret;

}
 
Example 30
Source Project: Drop-seq   Source File: TrimSequenceTemplate.java    License: MIT License 5 votes vote down vote up
public TrimSequenceTemplate(final String sequence, final String ignoredBases) {
	this.sequence = sequence;
	this.reverseComplement = SequenceUtil.reverseComplement(this.sequence);
	bases = StringUtil.stringToBytes(this.sequence);
	rcBases = StringUtil
			.stringToBytes(this.reverseComplement);
	this.ignoredBases = StringUtil
			.stringToBytes(ignoredBases);
}