Java Code Examples for htsjdk.samtools.util.IOUtil#getFilesMatchingRegexp()

The following examples show how to use htsjdk.samtools.util.IOUtil#getFilesMatchingRegexp() . You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: MultiTileBclFileUtil.java    From picard with MIT License 6 votes vote down vote up
MultiTileBclFileUtil(final File basecallLaneDir, final int lane) {
    // Since these file names do not contain lane number, first two args to ctor are the same.
    super("^(\\d{4}).bcl.bgzf$", ".bcl.bgzf", basecallLaneDir,
            new MultiTileBclFileFaker(), lane);
    this.basecallLaneDir = basecallLaneDir;
    bci = new File(basecallLaneDir, "s_" + lane + ".bci");
    // Do this once rather than when deciding if these files exist and again later.
    final File[] cycleFiles = IOUtil.getFilesMatchingRegexp(base, matchPattern);
    if (bci.exists()) {
        tileIndex = new TileIndex(bci);
        if (cycleFiles != null) {
            for (final File file : cycleFiles) {
                final String fileName = file.getName();
                final String cycleNum = fileName.substring(0, fileName.indexOf('.'));
                final IlluminaFileMap fileMap = new IlluminaFileMap();
                for(final Integer tile : tileIndex.getTiles()) {
                    fileMap.put(tile, file);
                }
                cycleFileMap.put(Integer.valueOf(cycleNum), fileMap);
            }
        }
    } else {
        tileIndex = null;
    }

}
 
Example 2
Source File: MultiTileFileUtil.java    From picard with MIT License 6 votes vote down vote up
MultiTileFileUtil(final String extension, final File base, final File bciDir, final FileFaker fileFaker,
                  final int lane) {
    super(false, extension, base, fileFaker, lane);
    bci = new File(bciDir, "s_" + lane + ".bci");
    if (bci.exists()) {
        tileIndex = new TileIndex(bci);
    } else {
        tileIndex = null;
    }
    final File[] filesMatchingRegexp = IOUtil.getFilesMatchingRegexp(base, matchPattern);
    if (filesMatchingRegexp == null || filesMatchingRegexp.length == 0) {
        dataFile = null;
    } else if (filesMatchingRegexp.length == 1) {
        dataFile = filesMatchingRegexp[0];
    } else {
        throw new PicardException("More than one filter file found in " + base.getAbsolutePath());
    }
}
 
Example 3
Source File: MultiTileFileUtil.java    From picard with MIT License 6 votes vote down vote up
@Override
public List<String> fakeFiles(final List<Integer> expectedTiles, final int[] expectedCycles,
                              final IlluminaFileUtil.SupportedIlluminaFormat format) {
    //we need to fake a bci file for the tile index
    final BciFileFaker bciFileFaker = new BciFileFaker();
    try {
        bciFileFaker.fakeBciFile(bci, expectedTiles);
        tileIndex = new TileIndex(bci);
        faker.fakeFile(base, expectedTiles, lane, extension);
        final File[] filesMatchingRegexp = IOUtil.getFilesMatchingRegexp(base, matchPattern);
        if (filesMatchingRegexp == null || filesMatchingRegexp.length == 0) {
            dataFile = null;
        } else if (filesMatchingRegexp.length == 1) {
            dataFile = filesMatchingRegexp[0];
        } else {
            throw new PicardException("More than one filter file found in " + base.getAbsolutePath());
        }
    } catch (final IOException e) {
        return Collections.singletonList("Could not create tile index file: " + bci.getAbsolutePath());
    }
    return tileIndex.verify(expectedTiles);
}
 
Example 4
Source File: TileMetricsUtil.java    From picard with MIT License 6 votes vote down vote up
public static Map<Integer, File> renderPhasingMetricsFilesFromBasecallingDirectory(File illuminaRunDirectory) {
    File[] cycleDirs = IOUtil.getFilesMatchingRegexp(new File(illuminaRunDirectory, INTEROP_SUBDIRECTORY_NAME),
            IlluminaFileUtil.CYCLE_SUBDIRECTORY_PATTERN);

    Map<Integer, File> phasingMetrics = new HashMap<>();
    Arrays.asList(cycleDirs)
            .forEach(cycleDir -> {
                File[] filesMatchingRegexp = IOUtil.getFilesMatchingRegexp(
                        cycleDir, "EmpiricalPhasingMetricsOut.bin");
                if (filesMatchingRegexp.length > 0) {
                    phasingMetrics.put(PerTilePerCycleFileUtil.getCycleFromDir(cycleDir),
                            filesMatchingRegexp[0]);
                }
            });
    return phasingMetrics;
}
 
Example 5
Source File: IlluminaFileUtil.java    From picard with MIT License 6 votes vote down vote up
public static boolean hasCbcls(final File basecallDir, final int lane) {
    final File laneDir = new File(basecallDir, IlluminaFileUtil.longLaneStr(lane));
    final File[] cycleDirs = IOUtil.getFilesMatchingRegexp(laneDir, IlluminaFileUtil.CYCLE_SUBDIRECTORY_PATTERN);

    // Either the lane or the cycle directory do not exist!
    if (cycleDirs == null) {
        return false;
    }

    //CBCLs
    final List<File> cbcls = new ArrayList<>();
    Arrays.asList(cycleDirs)
            .forEach(cycleDir -> cbcls.addAll(
                    Arrays.asList(IOUtil.getFilesMatchingRegexp(
                            cycleDir, "^" + IlluminaFileUtil.longLaneStr(lane) + "_(\\d{1,5}).cbcl$"))));

    return cbcls.size() > 0;
}
 
Example 6
Source File: PerTilePerCycleFileUtil.java    From picard with MIT License 6 votes vote down vote up
/**
 * For the given tiles, populate a CycleIlluminaFileMap that contains all these tiles and will iterate through
 * all the files for these tiles in expectedBase
 * Side Effect: Assigns numCycles
 *
 * @return A CycleIlluminaFileMap with the listed (or all) tiles for at least expectedCycles number of cycles(or total available
 * cycles if expectedCycles is null)
 */
protected CycleIlluminaFileMap getPerTilePerCycleFiles() {
    final CycleIlluminaFileMap cycledMap = new CycleIlluminaFileMap();

    final File laneDir = base;
    final File[] tempCycleDirs;
    tempCycleDirs = IOUtil.getFilesMatchingRegexp(laneDir, IlluminaFileUtil.CYCLE_SUBDIRECTORY_PATTERN);
    if (tempCycleDirs == null || tempCycleDirs.length == 0) {
        return cycledMap;
    }

    for (final File tempCycleDir : tempCycleDirs) {
        detectedCycles.add(getCycleFromDir(tempCycleDir));
    }

    final Set<Integer> uniqueTiles = new HashSet<Integer>();

    for (final File cycleDir : tempCycleDirs) {
        final IlluminaFileMap fileMap = getTiledFiles(cycleDir, matchPattern);
        uniqueTiles.addAll(fileMap.keySet());
        cycledMap.put(getCycleFromDir(cycleDir), fileMap);
    }

    this.tiles = new ArrayList<>(uniqueTiles);
    return cycledMap;
}
 
Example 7
Source File: ParameterizedFileUtil.java    From picard with MIT License 5 votes vote down vote up
protected File getRunFile(final File baseDirectory, final Pattern pattern) {
    if (baseDirectory.exists()) {
        IOUtil.assertDirectoryIsReadable(baseDirectory);
        final File[] files = IOUtil.getFilesMatchingRegexp(baseDirectory, pattern);
        if (files.length == 1) {
            return files[0];
        }
    }
    return null;
}
 
Example 8
Source File: ParameterizedFileUtil.java    From picard with MIT License 5 votes vote down vote up
/**
 * Return all files that match pattern of the given file type in the given base directory
 */
protected IlluminaFileMap getTiledFiles(final File baseDirectory, final Pattern pattern) {
    final IlluminaFileMap fileMap = new IlluminaFileMap();
    if (baseDirectory.exists()) {
        IOUtil.assertDirectoryIsReadable(baseDirectory);
        final File[] files = IOUtil.getFilesMatchingRegexp(baseDirectory, pattern);
        for (final File file : files) {
            if (!skipEmptyFiles || file.length() > 0) {
                fileMap.put(fileToTile(file.getName()), file);
            }
        }
    }

    return fileMap;
}
 
Example 9
Source File: NewIlluminaBasecallsConverter.java    From picard with MIT License 4 votes vote down vote up
public static File[] getTiledFiles(final File baseDirectory, final Pattern pattern) {
    return IOUtil.getFilesMatchingRegexp(baseDirectory, pattern);
}
 
Example 10
Source File: ExtractIlluminaBarcodesTest.java    From picard with MIT License 4 votes vote down vote up
/**
 * 4 cases tested:
 * * exact match to ACAGTG
 * * inexact match within threshold to TGACCA
 * * inexact match not within threshold to TGACCA
 * * inexact match where the next match is too close to ACAGTG
 *
 * @throws Exception
 */
@Test
public void testBarcodeMatching() throws Exception {
    final int lane = 1;
    final int barcodePosition = 26;
    final MetricsFile<ExtractIlluminaBarcodes.BarcodeMetric, Integer> metricsFile = runIt(lane, "25T8B25T");

    ExtractIlluminaBarcodes.BarcodeMetric metricOne = null;
    ExtractIlluminaBarcodes.BarcodeMetric metricTwo = null;
    ExtractIlluminaBarcodes.BarcodeMetric metricNoMatch = null;
    for (final ExtractIlluminaBarcodes.BarcodeMetric metric : metricsFile.getMetrics()) {
        if (metric.BARCODE.equals(BARCODES[0])) {
            metricOne = metric;
        } else if (metric.BARCODE.equals(BARCODES[2])) {
            metricTwo = metric;
        } else if (metric.BARCODE.equals("NNNNNNNN")) {
            metricNoMatch = metric;
        }
    }
    Assert.assertEquals(metricOne.PERFECT_MATCHES, 5);
    Assert.assertEquals(metricOne.ONE_MISMATCH_MATCHES, 0);
    Assert.assertEquals(metricOne.PF_READS, 3);
    Assert.assertEquals(metricOne.READS, 5);

    // one inexact match
    Assert.assertEquals(metricTwo.READS, 4);
    Assert.assertEquals(metricTwo.ONE_MISMATCH_MATCHES, 0);

    Assert.assertEquals(metricNoMatch.READS, 140);
    Assert.assertEquals(metricNoMatch.PF_READS, 112);

    // Check the barcode files themselves
    final File[] barcodeFiles = IOUtil.getFilesMatchingRegexp(basecallsDir, "s_" + lane + "_\\d{4}_barcode.txt");
    Arrays.sort(barcodeFiles);

    final BasicInputParser barcodeParser = new BasicInputParser(true, barcodeFiles);

    // Exact match
    String[] illuminaFields = barcodeParser.next();
    Assert.assertEquals(illuminaFields[1], "Y");
    Assert.assertEquals(illuminaFields[2], "CAACTCTC");

    // Inexact match
    illuminaFields = barcodeParser.next();
    Assert.assertEquals(illuminaFields[1], "Y");
    Assert.assertEquals(illuminaFields[2], "ACAGGTAT");

    // Too many mismatches
    illuminaFields = barcodeParser.next();
    Assert.assertEquals(illuminaFields[1], "N");

    barcodeParser.close();

    // Tack on test of barcode-informed Illumina Basecall parsing
    final ReadStructure rs = new ReadStructure("25T8B25T");
    final IlluminaDataProviderFactory factory = new IlluminaDataProviderFactory(basecallsDir, lane, rs,
            new BclQualityEvaluationStrategy(BclQualityEvaluationStrategy.ILLUMINA_ALLEGED_MINIMUM_QUALITY),
            IlluminaDataType.BaseCalls, IlluminaDataType.QualityScores, IlluminaDataType.Barcodes);
    testParsing(factory, rs, metricOne, barcodePosition);
}