Java Code Examples for org.deeplearning4j.spark.util.SparkUtils#listPaths()

The following examples show how to use org.deeplearning4j.spark.util.SparkUtils#listPaths() . These examples are extracted from open source projects. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
protected void entryPoint(String[] args) throws Exception {
    JCommander jcmdr = new JCommander(this);
    jcmdr.parse(args);
    //JCommanderUtils.parseArgs(this, args);
    SparkConf conf = new SparkConf();
    conf.setMaster("local[*]");
    conf.setAppName("DL4JTinyImageNetSparkPreproc");
    JavaSparkContext sc = new JavaSparkContext(conf);

    //Create training set
    JavaRDD<String> filePathsTrain = SparkUtils.listPaths(sc, sourceDir + "/train", true, NativeImageLoader.ALLOWED_FORMATS);
    SparkDataUtils.createFileBatchesSpark(filePathsTrain, saveDir, batchSize, sc);

    //Create test set
    JavaRDD<String> filePathsTest = SparkUtils.listPaths(sc, sourceDir + "/test", true, NativeImageLoader.ALLOWED_FORMATS);
    SparkDataUtils.createFileBatchesSpark(filePathsTest, saveDir, batchSize, sc);


    System.out.println("----- Data Preprocessing Complete -----");
}
 
Example 2
protected void entryPoint(String[] args) throws Exception {
    JCommander jcmdr = new JCommander(this);
    jcmdr.parse(args);
    //JCommanderUtils.parseArgs(this, args);
    SparkConf conf = new SparkConf();
    conf.setMaster("local[*]");
    conf.setAppName("DL4JTinyImageNetSparkPreproc");
    JavaSparkContext sc = new JavaSparkContext(conf);

    //Create training set
    JavaRDD<String> filePathsTrain = SparkUtils.listPaths(sc, sourceDir + "/train", true, NativeImageLoader.ALLOWED_FORMATS);
    SparkDataUtils.createFileBatchesSpark(filePathsTrain, saveDir, batchSize, sc);

    //Create test set
    JavaRDD<String> filePathsTest = SparkUtils.listPaths(sc, sourceDir + "/test", true, NativeImageLoader.ALLOWED_FORMATS);
    SparkDataUtils.createFileBatchesSpark(filePathsTest, saveDir, batchSize, sc);


    System.out.println("----- Data Preprocessing Complete -----");
}
 
Example 3
protected static ValidationResult validateDataSets(JavaSparkContext sc, String path, boolean recursive, boolean deleteInvalid,
                                            int[] featuresShape, int[] labelsShape) {
    JavaRDD<String> paths;
    try {
        paths = SparkUtils.listPaths(sc, path, recursive);
    } catch (IOException e) {
        throw new RuntimeException("Error listing paths in directory", e);
    }

    JavaRDD<ValidationResult> results = paths.map(new ValidateDataSetFn(deleteInvalid, featuresShape, labelsShape));

    return results.reduce(new ValidationResultReduceFn());
}
 
Example 4
protected static ValidationResult validateMultiDataSets(JavaSparkContext sc, String path, boolean recursive, boolean deleteInvalid,
                                                 int numFeatureArrays, int numLabelArrays,
                                                 List<int[]> featuresShape, List<int[]> labelsShape) {
    JavaRDD<String> paths;
    try {
        paths = SparkUtils.listPaths(sc, path, recursive);
    } catch (IOException e) {
        throw new RuntimeException("Error listing paths in directory", e);
    }

    JavaRDD<ValidationResult> results = paths.map(new ValidateMultiDataSetFn(deleteInvalid, numFeatureArrays, numLabelArrays,
            featuresShape, labelsShape));

    return results.reduce(new ValidationResultReduceFn());
}
 
Example 5
/**
 * Fit the SparkDl4jMultiLayer network using a directory of serialized DataSet objects
 * The assumption here is that the directory contains a number of {@link DataSet} objects, each serialized using
 * {@link DataSet#save(OutputStream)}
 *
 * @param path Path to the directory containing the serialized DataSet objcets
 * @return The MultiLayerNetwork after training
 */
public MultiLayerNetwork fit(String path) {
    if (Nd4j.getExecutioner() instanceof GridExecutioner)
        ((GridExecutioner) Nd4j.getExecutioner()).flushQueue();

    JavaRDD<String> paths;
    try {
        paths = SparkUtils.listPaths(sc, path);
    } catch (IOException e) {
        throw new RuntimeException("Error listing paths in directory", e);
    }

    return fitPaths(paths);
}
 
Example 6
/**
 * Evaluate on a directory containing a set of DataSet objects to be loaded with a {@link DataSetLoader}.
 * Uses default batch size of {@link #DEFAULT_EVAL_SCORE_BATCH_SIZE}
 * @param path Path/URI to the directory containing the datasets to load
 * @return Evaluation
 */
public <T extends Evaluation> T evaluate(String path, int batchSize, DataSetLoader loader){
    JavaRDD<String> paths;
    try {
        paths = SparkUtils.listPaths(sc, path);
    } catch (IOException e) {
        throw new RuntimeException("Error listing paths in directory", e);
    }

    JavaRDD<DataSet> rdd = paths.map(new LoadDataSetFunction(loader, new RemoteFileSourceFactory(BroadcastHadoopConfigHolder.get(sc))));
    return (T)doEvaluation(rdd, batchSize, new org.deeplearning4j.eval.Evaluation())[0];
}
 
Example 7
/**
 * Fit the SparkComputationGraph network using a directory of serialized DataSet objects
 * The assumption here is that the directory contains a number of {@link DataSet} objects, each serialized using
 * {@link DataSet#save(OutputStream)}
 *
 * @param path Path to the directory containing the serialized DataSet objcets
 * @return The MultiLayerNetwork after training
 */
public ComputationGraph fit(String path) {
    if (Nd4j.getExecutioner() instanceof GridExecutioner)
        ((GridExecutioner) Nd4j.getExecutioner()).flushQueue();

    JavaRDD<String> paths;
    try {
        paths = SparkUtils.listPaths(sc, path);
    } catch (IOException e) {
        throw new RuntimeException("Error listing paths in directory", e);
    }

    return fitPaths(paths);
}
 
Example 8
/**
 * Fit the SparkComputationGraph network using a directory of serialized MultiDataSet objects
 * The assumption here is that the directory contains a number of serialized {@link MultiDataSet} objects
 *
 * @param path Path to the directory containing the serialized MultiDataSet objcets
 * @return The MultiLayerNetwork after training
 */
public ComputationGraph fitMultiDataSet(String path) {
    if (Nd4j.getExecutioner() instanceof GridExecutioner)
        ((GridExecutioner) Nd4j.getExecutioner()).flushQueue();

    JavaRDD<String> paths;
    try {
        paths = SparkUtils.listPaths(sc, path);
    } catch (IOException e) {
        throw new RuntimeException("Error listing paths in directory", e);
    }

    return fitPathsMultiDataSet(paths);
}
 
Example 9
/**
 * Evaluate the single-output network on a directory containing a set of DataSet objects to be loaded with a {@link DataSetLoader}.
 * Uses default batch size of {@link #DEFAULT_EVAL_SCORE_BATCH_SIZE}
 * @param path Path/URI to the directory containing the datasets to load
 * @return Evaluation
 */
public Evaluation evaluate(String path, DataSetLoader loader){
    JavaRDD<String> data;
    try {
        data = SparkUtils.listPaths(sc, path);
    } catch (IOException e){
        throw new RuntimeException("Error listing files for evaluation of files at path: " + path, e);
    }
    return (Evaluation) doEvaluation(data, DEFAULT_EVAL_WORKERS, DEFAULT_EVAL_SCORE_BATCH_SIZE, loader, (MultiDataSetLoader)null, new Evaluation())[0];
}
 
Example 10
/**
 * Evaluate the single-output network on a directory containing a set of MultiDataSet objects to be loaded with a {@link MultiDataSetLoader}.
 * Uses default batch size of {@link #DEFAULT_EVAL_SCORE_BATCH_SIZE}
 * @param path Path/URI to the directory containing the datasets to load
 * @return Evaluation
 */
public Evaluation evaluate(String path, MultiDataSetLoader loader){
    JavaRDD<String> data;
    try {
        data = SparkUtils.listPaths(sc, path);
    } catch (IOException e){
        throw new RuntimeException("Error listing files for evaluation of files at path: " + path, e);
    }
    return (Evaluation) doEvaluation(data, DEFAULT_EVAL_WORKERS, DEFAULT_EVAL_SCORE_BATCH_SIZE, null, loader, new Evaluation())[0];
}