htsjdk.samtools.util.CollectionUtil Java Examples

The following examples show how to use htsjdk.samtools.util.CollectionUtil. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.

Example #1

Source File: GeneFromGTFBuilder.java From Drop-seq with MIT License

6 votes

private GeneFromGTF makeGeneWithTranscriptsFromGTFRecords(final Collection<GTFRecord> gtfRecords) {
     final GeneFromGTF gene = makeGeneFromGTFRecords(gtfRecords);
     // Remove featureType==gene before making transcripts
     final Collection<GTFRecord> nonGeneGTFRecords = CollectionUtil.makeCollection(new GeneAnnotationFilter(gtfRecords.iterator()));

     final Map<String, List<GTFRecord>> gtfLinesByTranscript = gatherByTranscriptId(nonGeneGTFRecords);
     for (final Map.Entry<String, List<GTFRecord>> entry : gtfLinesByTranscript.entrySet()) {
         if (entry.getKey() == null)
	// Skip gene entries
             continue;
         addTranscriptToGeneFromGTFRecords(gene, entry.getValue());
     }

     if (!gene.iterator().hasNext())
throw new AnnotationException("No transcript in GTF for gene " + gene.getName());

     return gene;
 }

Example #2

Source File: HaplotypeProbabilitiesTest.java From picard with MIT License

6 votes

@Test(dataProvider = "symmetricLODdata")
public void testSymmetricLOD(final double[] llikelihoods1, final double[] llikelihoods2) {
    final HaplotypeBlock haplotypeBlock = new HaplotypeBlock(0.1);
    final Snp testSnp = new Snp("test", "chrTest", 1, (byte) 'A', (byte) 'C', .1, Collections.emptyList());
    haplotypeBlock.addSnp(testSnp);

    final HaplotypeProbabilitiesFromGenotypeLikelihoods hp1 = new HaplotypeProbabilitiesFromGenotypeLikelihoods(haplotypeBlock);
    final HaplotypeProbabilitiesFromGenotypeLikelihoods hp2 = new HaplotypeProbabilitiesFromGenotypeLikelihoods(haplotypeBlock);

    hp1.addToLogLikelihoods(testSnp, CollectionUtil.makeList(Allele.ALT_A, Allele.REF_C), llikelihoods1);
    hp2.addToLogLikelihoods(testSnp, CollectionUtil.makeList(Allele.ALT_A, Allele.REF_C), llikelihoods2);

    TestNGUtil.assertEqualDoubleArrays(MathUtil.pNormalizeVector(hp1.getPosteriorProbabilities()),
            MathUtil.pNormalizeVector(MathUtil.multiply(MathUtil.pNormalizeLogProbability(llikelihoods1), haplotypeBlock.getHaplotypeFrequencies())), .00001);

    TestNGUtil.assertEqualDoubleArrays(MathUtil.pNormalizeVector(hp2.getPosteriorProbabilities()),
            MathUtil.pNormalizeVector(MathUtil.multiply(MathUtil.pNormalizeLogProbability(llikelihoods2), haplotypeBlock.getHaplotypeFrequencies())), .00001);

    final double ll21 = hp1.scaledEvidenceProbabilityUsingGenotypeFrequencies(hp2.getPosteriorLikelihoods());
    final double ll12 = hp2.scaledEvidenceProbabilityUsingGenotypeFrequencies(hp1.getPosteriorLikelihoods());

    Assert.assertTrue(TestNGUtil.compareDoubleWithAccuracy(ll12, ll21, 0.001), "found : " + ll12 + " and " + ll21);
}

Example #3

Source File: ReduceGtfTest.java From Drop-seq with MIT License

6 votes

@Test(enabled=true, groups={"dropseq", "transcriptome"})
public void testAPITD1() {
	Iterator<GTFRecord> gtfIterator = parseGtf(GTF_FILE4);
	Assert.assertNotNull(gtfIterator);
       Collection<GTFRecord> records = CollectionUtil.makeCollection(gtfIterator);
	// gunzip -c human_APITD1.gtf.gz | grep -v CDS |grep -v start_codon |grep -v stop_codon |wc -l
	Assert.assertEquals(records.size(),26);

       final GeneFromGTFBuilder geneBuilder = new GeneFromGTFBuilder(records.iterator());
       Collection<GeneFromGTF> genes = CollectionUtil.makeCollection(geneBuilder);
	Assert.assertEquals(genes.size(),1);

       final EnhanceGTFRecords enhancer = new EnhanceGTFRecords();
       for (final GeneFromGTF gene : genes)
		Assert.assertNotNull(enhancer.enhanceGene(gene));
}

Example #4

Source File: ReduceGtfTest.java From Drop-seq with MIT License

6 votes

@Test(enabled=true, groups={"dropseq", "transcriptome"})
public void testAPITD1Complex() {
       Iterator<GTFRecord> gtfIterator = parseGtf(GTF_FILE5);
	Assert.assertNotNull(gtfIterator);
       Collection<GTFRecord> records = CollectionUtil.makeCollection(gtfIterator);
	// gunzip -c human_APITD1.gtf.gz | grep -v CDS |grep -v start_codon |grep -v stop_codon |wc -l
	Assert.assertEquals(records.size(),42);

       final GeneFromGTFBuilder geneBuilder = new GeneFromGTFBuilder(records.iterator());
       Collection<GeneFromGTF> genes = CollectionUtil.makeCollection(geneBuilder);
       Assert.assertEquals(genes.size(),2);

       final EnhanceGTFRecords enhancer = new EnhanceGTFRecords();
       for (final GeneFromGTF gene : genes)
		Assert.assertNotNull(enhancer.enhanceGene(gene));
}

Example #5

Source File: RExecutor.java From hmftools with GNU General Public License v3.0

6 votes

private static int executeFromFile(final String rScriptName, final File scriptFile, final String... arguments)
        throws IOException, InterruptedException {
    final String[] command = new String[arguments.length + 2];
    command[0] = R_EXE;
    command[1] = scriptFile.getAbsolutePath();
    System.arraycopy(arguments, 0, command, 2, arguments.length);

    final File outputFile = File.createTempFile(rScriptName, ".out");
    final File errorFile = File.createTempFile(rScriptName, ".error");

    LOGGER.info(String.format("Executing R script via command: %s", CollectionUtil.join(Arrays.asList(command), " ")));
    int result = new ProcessBuilder(command).redirectError(errorFile).redirectOutput(outputFile).start().waitFor();
    if (result != 0) {
        LOGGER.fatal("Error executing R script. Examine error file {} for details.", errorFile.toString());
    }

    return result;
}

Example #6

Source File: HaplotypeProbabilitiesTest.java From picard with MIT License

6 votes

@Test(dataProvider = "dataTestpEvidenceGivenPriorFromGLs")
public void testpEvidenceGivenPriorFromGLs(final HaplotypeProbabilitiesFromGenotypeLikelihoods hp, final List<Snp> snps, final List<Boolean> swaps, final List<double[]> logLikelihoods) {

    for (int i = 0; i < snps.size(); ++i) {
        final Allele a = Allele.create(swaps.get(i) ? snps.get(i).getAllele2() : snps.get(i).getAllele1());
        final Allele b = Allele.create(swaps.get(i) ? snps.get(i).getAllele1() : snps.get(i).getAllele2());

        hp.addToLogLikelihoods(snps.get(i), CollectionUtil.makeList(a, b), logLikelihoods.get(i));
    }

    final double[] postLogLikelihood = new double[nGenotypes];
    for (final int genotype:genotypes) {
        postLogLikelihood[genotype] = log10(hp.getHaplotype().getHaplotypeFrequency(genotype));
        for (int i = 0; i < logLikelihoods.size(); i++) {
            final double[] genotypeLogLikelihoods = logLikelihoods.get(i);
            Assert.assertEquals(genotypeLogLikelihoods.length, nGenotypes);
            final int swappedGenotype = swaps.get(i) ? nGenotypes - genotype - 1 : genotype;
            postLogLikelihood[genotype] += genotypeLogLikelihoods[swappedGenotype];
        }
    }
    assertEqualDoubleArrays(hp.getPosteriorProbabilities(), MathUtil.pNormalizeLogProbability(postLogLikelihood), 1e-10);
}

Example #7

Source File: BclQualityEvaluationStrategy.java From picard with MIT License

6 votes

/**
 * Reviews the qualities observed thus far and throws an exception if any are below the minimum quality threshold.
 */
public void assertMinimumQualities() {
    final Collection<String> errorTokens = new LinkedList<String>();
    for (final Map.Entry<Byte, AtomicInteger> entry : this.qualityCountMap.entrySet()) {
        /**
         * We're comparing revised qualities here, not observed, but the qualities that are logged in qualityCountMap are observed
         * qualities.  So as we iterate through it, convert observed qualities into their revised value. 
         */
        if (generateRevisedQuality(entry.getKey()) < minimumRevisedQuality) { 
            errorTokens.add(String.format("quality %s observed %s times", entry.getKey(), entry.getValue()));
        }
    }
    if (!errorTokens.isEmpty()) {
        throw new PicardException(String.format(
                "Found BCL qualities that fell beneath minimum threshold of %s: %s.",
                minimumRevisedQuality, 
                CollectionUtil.join(errorTokens, "; ")
        ));
    }
}

Example #8

Source File: TileMetricsUtil.java From picard with MIT License

6 votes

private static Collection<Tile> getTileClusterRecordsV3(
        final Map<String, ? extends Collection<IlluminaTileMetrics>> locationToMetricsMap,
        final Map<Integer, Map<Integer, Collection<TilePhasingValue>>> phasingValues,
        final float density) {

    final Collection<Tile> tiles = new LinkedList<>();
    for (final Map.Entry<String, ? extends Collection<IlluminaTileMetrics>> entry : locationToMetricsMap.entrySet()) {
        final Collection<IlluminaTileMetrics> tileRecords = entry.getValue();

        final IlluminaTileMetrics record = CollectionUtil.getSoleElement(tileRecords);

        //only create for cluster records
        if (record.isClusterRecord()) {
            final Collection<TilePhasingValue> tilePhasingValues = phasingValues.get(record.getLaneNumber()).get(record.getTileNumber());
            tiles.add(new Tile(record.getLaneNumber(), record.getTileNumber(), density, record.getMetricValue(),
                    tilePhasingValues.toArray(new TilePhasingValue[tilePhasingValues.size()])));
        }
    }
    return Collections.unmodifiableCollection(tiles);
}

Example #9

Source File: IntervalListToBed.java From picard with MIT License

6 votes

@Override
protected int doWork() {
    IOUtil.assertFileIsReadable(INPUT);
    IOUtil.assertFileIsWritable(OUTPUT);

    IntervalList intervals = IntervalList.fromFile(INPUT);
    if (SORT) intervals = intervals.sorted();

    try {
        final BufferedWriter out = IOUtil.openFileForBufferedWriting(OUTPUT);
        for (final Interval i : intervals) {
            final String strand = i.isNegativeStrand() ? "-" : "+";
            final List<?> fields = CollectionUtil.makeList(i.getContig(), i.getStart()-1, i.getEnd(), i.getName(), SCORE, strand);
            out.append(fields.stream().map(String::valueOf).collect(Collectors.joining("\t")));
            out.newLine();
        }

        out.close();
    }
    catch (IOException ioe) {
        throw new RuntimeIOException(ioe);
    }

    return 0;
}

Example #10

Source File: MarkDuplicatesTest.java From picard with MIT License

6 votes

@Test(dataProvider = "testOpticalDuplicateDetectionDataProvider")
public void testOpticalDuplicateDetection(final File sam, final long expectedNumOpticalDuplicates) {
    final File outputDir = IOUtil.createTempDir(TEST_BASE_NAME + ".", ".tmp");
    outputDir.deleteOnExit();
    final File outputSam = new File(outputDir, TEST_BASE_NAME + ".sam");
    outputSam.deleteOnExit();
    final File metricsFile = new File(outputDir, TEST_BASE_NAME + ".duplicate_metrics");
    metricsFile.deleteOnExit();
    // Run MarkDuplicates, merging the 3 input files, and either enabling or suppressing PG header
    // record creation according to suppressPg.
    final MarkDuplicates markDuplicates = new MarkDuplicates();
    markDuplicates.setupOpticalDuplicateFinder();
    markDuplicates.INPUT = CollectionUtil.makeList(sam.getAbsolutePath());
    markDuplicates.OUTPUT = outputSam;
    markDuplicates.METRICS_FILE = metricsFile;
    markDuplicates.TMP_DIR = CollectionUtil.makeList(outputDir);
    // Needed to suppress calling CommandLineProgram.getVersion(), which doesn't work for code not in a jar
    markDuplicates.PROGRAM_RECORD_ID = null;
    Assert.assertEquals(markDuplicates.doWork(), 0);
    Assert.assertEquals(markDuplicates.numOpticalDuplicates(), expectedNumOpticalDuplicates);
    IOUtil.recursiveDelete(outputDir.toPath());

}

Example #11

Source File: IntervalListScattererWithSubdivision.java From picard with MIT License

5 votes

@Override
public List<Interval> takeSome(final Interval interval, final long idealSplitWeight, final long currentSize, final double projectSizeOfRemaining) {
    final long amount = idealSplitWeight - currentSize;

    if (amount >= interval.length()) {
        return CollectionUtil.makeList(interval, null);
    }

    if (amount == 0) {
        return CollectionUtil.makeList(null, interval);
    }

    final Interval left = new Interval(
            interval.getContig(),
            interval.getStart(),
            interval.getStart() + (int) amount - 1,
            interval.isNegativeStrand(),
            interval.getName()
    );
    final Interval right = new Interval(
            interval.getContig(),
            interval.getStart() + (int) amount,
            interval.getEnd(),
            interval.isNegativeStrand(),
            interval.getName()
    );
    return CollectionUtil.makeList(left, right);
}

Example #12

Source File: GeneFromGTFBuilder.java From Drop-seq with MIT License

5 votes

private GeneFromGTF makeGeneFromGTFRecords(final Collection<GTFRecord> gtfRecords) {
     GTFRecord lineOne=gtfRecords.iterator().next();

     String geneName=lineOne.getGeneName();

     final boolean transcriptNegStrand = lineOne.isNegativeStrand();

     // Figure out the extend of the gene
     int start = Integer.MAX_VALUE;
     int end = Integer.MIN_VALUE;
     final Set<String> geneIds = new HashSet<>();
     final Set<String> chromosomes = new HashSet<>();
     for (final GTFRecord r: gtfRecords) {
         start = Math.min(start, r.getStart());
         end   = Math.max(end,   r.getEnd());
         geneIds.add(r.getGeneID());
         chromosomes.add(r.getChromosome());
     }
     if (chromosomes.size() > 1)
throw new AnnotationException("Chromosome disagreement(" + CollectionUtil.join(chromosomes, ", ") +
                 ") in GTF file for gene " + geneName);
     final GeneFromGTF gene = new GeneFromGTF(lineOne.getChromosome(), start, end, transcriptNegStrand, geneName, lineOne.getFeatureType(),
             lineOne.getGeneID(), lineOne.getTranscriptType(), lineOne.getGeneVersion());

     for (final GTFRecord gtfRecord : gtfRecords)
validateGTFRecord(gtfRecord, gene);

     if (geneIds.size() > 1)
throw new AnnotationException(String.format("Multiple gene IDs for gene %s: %s", geneName, CollectionUtil.join(geneIds, ", ")));

     return gene;
 }

Example #13

Source File: IntervalListScattererWithoutSubdivision.java From picard with MIT License

5 votes

@Override
public List<Interval> takeSome(final Interval interval, final long idealSplitWeight, final long currentSize, final double projectedSizeOfRemaining) {
    final long projectedSize = currentSize + intervalWeight(interval);
    if (shouldIncludeInterval(idealSplitWeight, projectedSizeOfRemaining, projectedSize)) {
        return CollectionUtil.makeList(interval, null);
    } else {
        return CollectionUtil.makeList(null, interval);
    }
}

Example #14

Source File: RExecutor.java From picard with MIT License

5 votes

/**
 * Executes the given R script that is stored in a file by a call to Rscript.
 * Blocks until the R script is complete.
 * 
 * @param scriptFile the file object for the script
 * @param arguments any arguments required by the script
 * @return the return code of the R process
 */
public static int executeFromFile(final File scriptFile, final String... arguments) {
    final String[] command = new String[arguments.length + 2];
    command[0] = R_EXE;
    command[1] = scriptFile.getAbsolutePath();
    System.arraycopy(arguments, 0, command, 2, arguments.length);
    LOG.info(String.format("Executing R script via command: %s", CollectionUtil.join(Arrays.asList(command), " ")));
    return ProcessExecutor.execute(command);
}

Example #15

Source File: ReadNameParserTests.java From picard with MIT License

5 votes

/** Tests rapidParseInt for positive and negative numbers, as well as non-digit suffixes */
@Test
public void testRapidParseIntFails() {
    List<String> values = CollectionUtil.makeList("foo", "bar", "abc123", "-foo", "f00", "-f00");
    for (String s : values) {
        try {
            ReadNameParser.rapidParseInt(s);
            Assert.fail("Should have failed to rapid-parse " + s + " as an int.");
        }
        catch (NumberFormatException nfe) {
            /* expected */
        }
    }
}

Example #16