Java Code Examples for org.apache.spark.api.java.JavaRDD#id()

The following examples show how to use org.apache.spark.api.java.JavaRDD#id() . You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: BaseTrainingMaster.java    From deeplearning4j with Apache License 2.0 5 votes vote down vote up
protected JavaRDD<String> exportIfRequired(JavaSparkContext sc, JavaRDD<DataSet> trainingData) {
    ExportSupport.assertExportSupported(sc);
    if (collectTrainingStats)
        stats.logExportStart();

    //Two possibilities here:
    // 1. We've seen this RDD before (i.e., multiple epochs training case)
    // 2. We have not seen this RDD before
    //    (a) And we haven't got any stored data -> simply export
    //    (b) And we previously exported some data from a different RDD -> delete the last data
    int currentRDDUid = trainingData.id(); //Id is a "A unique ID for this RDD (within its SparkContext)."

    String baseDir;
    if (lastExportedRDDId == Integer.MIN_VALUE) {
        //Haven't seen a RDD<DataSet> yet in this training master -> export data
        baseDir = export(trainingData);
    } else {
        if (lastExportedRDDId == currentRDDUid) {
            //Use the already-exported data again for another epoch
            baseDir = getBaseDirForRDD(trainingData);
        } else {
            //The new RDD is different to the last one
            // Clean up the data for the last one, and export
            deleteTempDir(sc, lastRDDExportPath);
            baseDir = export(trainingData);
        }
    }

    if (collectTrainingStats)
        stats.logExportEnd();

    return sc.textFile(baseDir + "paths/");
}
 
Example 2
Source File: BaseTrainingMaster.java    From deeplearning4j with Apache License 2.0 5 votes vote down vote up
protected JavaRDD<String> exportIfRequiredMDS(JavaSparkContext sc, JavaRDD<MultiDataSet> trainingData) {
    ExportSupport.assertExportSupported(sc);
    if (collectTrainingStats)
        stats.logExportStart();

    //Two possibilities here:
    // 1. We've seen this RDD before (i.e., multiple epochs training case)
    // 2. We have not seen this RDD before
    //    (a) And we haven't got any stored data -> simply export
    //    (b) And we previously exported some data from a different RDD -> delete the last data
    int currentRDDUid = trainingData.id(); //Id is a "A unique ID for this RDD (within its SparkContext)."

    String baseDir;
    if (lastExportedRDDId == Integer.MIN_VALUE) {
        //Haven't seen a RDD<DataSet> yet in this training master -> export data
        baseDir = exportMDS(trainingData);
    } else {
        if (lastExportedRDDId == currentRDDUid) {
            //Use the already-exported data again for another epoch
            baseDir = getBaseDirForRDD(trainingData);
        } else {
            //The new RDD is different to the last one
            // Clean up the data for the last one, and export
            deleteTempDir(sc, lastRDDExportPath);
            baseDir = exportMDS(trainingData);
        }
    }

    if (collectTrainingStats)
        stats.logExportEnd();

    return sc.textFile(baseDir + "paths/");
}
 
Example 3
Source File: BaseTrainingMaster.java    From deeplearning4j with Apache License 2.0 5 votes vote down vote up
protected String export(JavaRDD<DataSet> trainingData) {
    String baseDir = getBaseDirForRDD(trainingData);
    String dataDir = baseDir + "data/";
    String pathsDir = baseDir + "paths/";

    log.info("Initiating RDD<DataSet> export at {}", baseDir);
    JavaRDD<String> paths = trainingData
                    .mapPartitionsWithIndex(new BatchAndExportDataSetsFunction(batchSizePerWorker, dataDir), true);
    paths.saveAsTextFile(pathsDir);
    log.info("RDD<DataSet> export complete at {}", baseDir);

    lastExportedRDDId = trainingData.id();
    lastRDDExportPath = baseDir;
    return baseDir;
}
 
Example 4
Source File: BaseTrainingMaster.java    From deeplearning4j with Apache License 2.0 5 votes vote down vote up
protected String exportMDS(JavaRDD<MultiDataSet> trainingData) {
    String baseDir = getBaseDirForRDD(trainingData);
    String dataDir = baseDir + "data/";
    String pathsDir = baseDir + "paths/";

    log.info("Initiating RDD<MultiDataSet> export at {}", baseDir);
    JavaRDD<String> paths = trainingData.mapPartitionsWithIndex(
                    new BatchAndExportMultiDataSetsFunction(batchSizePerWorker, dataDir), true);
    paths.saveAsTextFile(pathsDir);
    log.info("RDD<MultiDataSet> export complete at {}", baseDir);

    lastExportedRDDId = trainingData.id();
    lastRDDExportPath = baseDir;
    return baseDir;
}
 
Example 5
Source File: BaseTrainingMaster.java    From deeplearning4j with Apache License 2.0 5 votes vote down vote up
protected String getBaseDirForRDD(JavaRDD<?> rdd) {
    if (exportDirectory == null) {
        exportDirectory = getDefaultExportDirectory(rdd.context());
    }

    return exportDirectory + (exportDirectory.endsWith("/") ? "" : "/") + trainingMasterUID + "/" + rdd.id() + "/";
}