Java Code Examples for org.deeplearning4j.spark.util.SparkUtils#listPaths()

The following examples show how to use org.deeplearning4j.spark.util.SparkUtils#listPaths() . You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.

Example 1

Source File: PreprocessSpark.java From Java-Deep-Learning-Cookbook with MIT License

6 votes

protected void entryPoint(String[] args) throws Exception {
    JCommander jcmdr = new JCommander(this);
    jcmdr.parse(args);
    //JCommanderUtils.parseArgs(this, args);
    SparkConf conf = new SparkConf();
    conf.setMaster("local[*]");
    conf.setAppName("DL4JTinyImageNetSparkPreproc");
    JavaSparkContext sc = new JavaSparkContext(conf);

    //Create training set
    JavaRDD<String> filePathsTrain = SparkUtils.listPaths(sc, sourceDir + "/train", true, NativeImageLoader.ALLOWED_FORMATS);
    SparkDataUtils.createFileBatchesSpark(filePathsTrain, saveDir, batchSize, sc);

    //Create test set
    JavaRDD<String> filePathsTest = SparkUtils.listPaths(sc, sourceDir + "/test", true, NativeImageLoader.ALLOWED_FORMATS);
    SparkDataUtils.createFileBatchesSpark(filePathsTest, saveDir, batchSize, sc);


    System.out.println("----- Data Preprocessing Complete -----");
}

Example 2

Source File: PreprocessSpark.java From Java-Deep-Learning-Cookbook with MIT License

6 votes

protected void entryPoint(String[] args) throws Exception {
    JCommander jcmdr = new JCommander(this);
    jcmdr.parse(args);
    //JCommanderUtils.parseArgs(this, args);
    SparkConf conf = new SparkConf();
    conf.setMaster("local[*]");
    conf.setAppName("DL4JTinyImageNetSparkPreproc");
    JavaSparkContext sc = new JavaSparkContext(conf);

    //Create training set
    JavaRDD<String> filePathsTrain = SparkUtils.listPaths(sc, sourceDir + "/train", true, NativeImageLoader.ALLOWED_FORMATS);
    SparkDataUtils.createFileBatchesSpark(filePathsTrain, saveDir, batchSize, sc);

    //Create test set
    JavaRDD<String> filePathsTest = SparkUtils.listPaths(sc, sourceDir + "/test", true, NativeImageLoader.ALLOWED_FORMATS);
    SparkDataUtils.createFileBatchesSpark(filePathsTest, saveDir, batchSize, sc);


    System.out.println("----- Data Preprocessing Complete -----");
}

Example 3

Source File: SparkDataValidation.java From deeplearning4j with Apache License 2.0

5 votes

protected static ValidationResult validateDataSets(JavaSparkContext sc, String path, boolean recursive, boolean deleteInvalid,
                                            int[] featuresShape, int[] labelsShape) {
    JavaRDD<String> paths;
    try {
        paths = SparkUtils.listPaths(sc, path, recursive);
    } catch (IOException e) {
        throw new RuntimeException("Error listing paths in directory", e);
    }

    JavaRDD<ValidationResult> results = paths.map(new ValidateDataSetFn(deleteInvalid, featuresShape, labelsShape));

    return results.reduce(new ValidationResultReduceFn());
}

Example 4

Source File: SparkDataValidation.java From deeplearning4j with Apache License 2.0

5 votes

protected static ValidationResult validateMultiDataSets(JavaSparkContext sc, String path, boolean recursive, boolean deleteInvalid,
                                                 int numFeatureArrays, int numLabelArrays,
                                                 List<int[]> featuresShape, List<int[]> labelsShape) {
    JavaRDD<String> paths;
    try {
        paths = SparkUtils.listPaths(sc, path, recursive);
    } catch (IOException e) {
        throw new RuntimeException("Error listing paths in directory", e);
    }

    JavaRDD<ValidationResult> results = paths.map(new ValidateMultiDataSetFn(deleteInvalid, numFeatureArrays, numLabelArrays,
            featuresShape, labelsShape));

    return results.reduce(new ValidationResultReduceFn());
}

Example 5

Source File: SparkDl4jMultiLayer.java From deeplearning4j with Apache License 2.0

5 votes

/**
 * Fit the SparkDl4jMultiLayer network using a directory of serialized DataSet objects
 * The assumption here is that the directory contains a number of {@link DataSet} objects, each serialized using
 * {@link DataSet#save(OutputStream)}
 *
 * @param path Path to the directory containing the serialized DataSet objcets
 * @return The MultiLayerNetwork after training
 */
public MultiLayerNetwork fit(String path) {
    if (Nd4j.getExecutioner() instanceof GridExecutioner)
        ((GridExecutioner) Nd4j.getExecutioner()).flushQueue();

    JavaRDD<String> paths;
    try {
        paths = SparkUtils.listPaths(sc, path);
    } catch (IOException e) {
        throw new RuntimeException("Error listing paths in directory", e);
    }

    return fitPaths(paths);
}

Example 6

Source File: SparkDl4jMultiLayer.java From deeplearning4j with Apache License 2.0

5 votes

/**
 * Evaluate on a directory containing a set of DataSet objects to be loaded with a {@link DataSetLoader}.
 * Uses default batch size of {@link #DEFAULT_EVAL_SCORE_BATCH_SIZE}
 * @param path Path/URI to the directory containing the datasets to load
 * @return Evaluation
 */
public <T extends Evaluation> T evaluate(String path, int batchSize, DataSetLoader loader){
    JavaRDD<String> paths;
    try {
        paths = SparkUtils.listPaths(sc, path);
    } catch (IOException e) {
        throw new RuntimeException("Error listing paths in directory", e);
    }

    JavaRDD<DataSet> rdd = paths.map(new LoadDataSetFunction(loader, new RemoteFileSourceFactory(BroadcastHadoopConfigHolder.get(sc))));
    return (T)doEvaluation(rdd, batchSize, new org.deeplearning4j.eval.Evaluation())[0];
}

Example 7

Source File: SparkComputationGraph.java From deeplearning4j with Apache License 2.0

5 votes

/**
 * Fit the SparkComputationGraph network using a directory of serialized DataSet objects
 * The assumption here is that the directory contains a number of {@link DataSet} objects, each serialized using
 * {@link DataSet#save(OutputStream)}
 *
 * @param path Path to the directory containing the serialized DataSet objcets
 * @return The MultiLayerNetwork after training
 */
public ComputationGraph fit(String path) {
    if (Nd4j.getExecutioner() instanceof GridExecutioner)
        ((GridExecutioner) Nd4j.getExecutioner()).flushQueue();

    JavaRDD<String> paths;
    try {
        paths = SparkUtils.listPaths(sc, path);
    } catch (IOException e) {
        throw new RuntimeException("Error listing paths in directory", e);
    }

    return fitPaths(paths);
}

Example 8

Source File: SparkComputationGraph.java From deeplearning4j with Apache License 2.0

5 votes

/**
 * Fit the SparkComputationGraph network using a directory of serialized MultiDataSet objects
 * The assumption here is that the directory contains a number of serialized {@link MultiDataSet} objects
 *
 * @param path Path to the directory containing the serialized MultiDataSet objcets
 * @return The MultiLayerNetwork after training
 */
public ComputationGraph fitMultiDataSet(String path) {
    if (Nd4j.getExecutioner() instanceof GridExecutioner)
        ((GridExecutioner) Nd4j.getExecutioner()).flushQueue();

    JavaRDD<String> paths;
    try {
        paths = SparkUtils.listPaths(sc, path);
    } catch (IOException e) {
        throw new RuntimeException("Error listing paths in directory", e);
    }

    return fitPathsMultiDataSet(paths);
}

Example 9

Source File: SparkComputationGraph.java From deeplearning4j with Apache License 2.0

5 votes

/**
 * Evaluate the single-output network on a directory containing a set of DataSet objects to be loaded with a {@link DataSetLoader}.
 * Uses default batch size of {@link #DEFAULT_EVAL_SCORE_BATCH_SIZE}
 * @param path Path/URI to the directory containing the datasets to load
 * @return Evaluation
 */
public Evaluation evaluate(String path, DataSetLoader loader){
    JavaRDD<String> data;
    try {
        data = SparkUtils.listPaths(sc, path);
    } catch (IOException e){
        throw new RuntimeException("Error listing files for evaluation of files at path: " + path, e);
    }
    return (Evaluation) doEvaluation(data, DEFAULT_EVAL_WORKERS, DEFAULT_EVAL_SCORE_BATCH_SIZE, loader, (MultiDataSetLoader)null, new Evaluation())[0];
}

Example 10

Source File: SparkComputationGraph.java From deeplearning4j with Apache License 2.0

5 votes

/**
 * Evaluate the single-output network on a directory containing a set of MultiDataSet objects to be loaded with a {@link MultiDataSetLoader}.
 * Uses default batch size of {@link #DEFAULT_EVAL_SCORE_BATCH_SIZE}
 * @param path Path/URI to the directory containing the datasets to load
 * @return Evaluation
 */
public Evaluation evaluate(String path, MultiDataSetLoader loader){
    JavaRDD<String> data;
    try {
        data = SparkUtils.listPaths(sc, path);
    } catch (IOException e){
        throw new RuntimeException("Error listing files for evaluation of files at path: " + path, e);
    }
    return (Evaluation) doEvaluation(data, DEFAULT_EVAL_WORKERS, DEFAULT_EVAL_SCORE_BATCH_SIZE, null, loader, new Evaluation())[0];
}