Java Code Examples for org.deeplearning4j.nn.multilayer.MultiLayerNetwork#setUpdater()

The following examples show how to use org.deeplearning4j.nn.multilayer.MultiLayerNetwork#setUpdater() . You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: TestUpdaters.java    From deeplearning4j with Apache License 2.0 6 votes vote down vote up
@Test
public void testSetGetUpdater2() {
    //Same as above test, except that we are doing setUpdater on a new network
    Nd4j.getRandom().setSeed(12345L);
    double lr = 0.03;
    int nIn = 4;
    int nOut = 8;

    MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder().updater(new Nesterovs(lr,0.6)).list()
                    .layer(0, new DenseLayer.Builder().nIn(nIn).nOut(5)
                                    .updater(org.deeplearning4j.nn.conf.Updater.SGD).build())
                    .layer(1, new DenseLayer.Builder().nIn(5).nOut(6)
                                    .updater(new NoOp()).build())
                    .layer(2, new DenseLayer.Builder().nIn(6).nOut(7)
                                    .updater(org.deeplearning4j.nn.conf.Updater.ADAGRAD).build())
                    .layer(3, new OutputLayer.Builder().nIn(7).nOut(nOut).activation(Activation.SOFTMAX)
                                    .updater(org.deeplearning4j.nn.conf.Updater.NESTEROVS).build())
                    .build();

    MultiLayerNetwork net = new MultiLayerNetwork(conf);
    net.init();

    Updater newUpdater = UpdaterCreator.getUpdater(net);
    net.setUpdater(newUpdater);
    assertTrue(newUpdater == net.getUpdater()); //Should be identical object
}
 
Example 2
Source File: NetworkUtils.java    From deeplearning4j with Apache License 2.0 5 votes vote down vote up
private static void refreshUpdater(MultiLayerNetwork net) {
    INDArray origUpdaterState = net.getUpdater().getStateViewArray();
    MultiLayerUpdater origUpdater = (MultiLayerUpdater) net.getUpdater();
    net.setUpdater(null);
    MultiLayerUpdater newUpdater = (MultiLayerUpdater) net.getUpdater();
    INDArray newUpdaterState = rebuildUpdaterStateArray(origUpdaterState, origUpdater.getUpdaterBlocks(), newUpdater.getUpdaterBlocks());
    newUpdater.setStateViewArray(newUpdaterState);
}
 
Example 3
Source File: ParameterAveragingTrainingWorker.java    From deeplearning4j with Apache License 2.0 5 votes vote down vote up
@Override
public MultiLayerNetwork getInitialModel() {
    if (configuration.isCollectTrainingStats())
        stats = new ParameterAveragingTrainingWorkerStats.ParameterAveragingTrainingWorkerStatsHelper();

    if (configuration.isCollectTrainingStats())
        stats.logBroadcastGetValueStart();
    NetBroadcastTuple tuple = broadcast.getValue();
    if (configuration.isCollectTrainingStats())
        stats.logBroadcastGetValueEnd();

    //Don't want to have shared configuration object: each may update its iteration count (for LR schedule etc) individually
    MultiLayerNetwork net = new MultiLayerNetwork(tuple.getConfiguration().clone());
    //Can't have shared parameter array across executors for parameter averaging, hence the 'true' for clone parameters array arg
    net.init(tuple.getParameters().unsafeDuplication(), false);

    if (tuple.getUpdaterState() != null) {
        net.setUpdater(new MultiLayerUpdater(net, tuple.getUpdaterState().unsafeDuplication())); //Can't have shared updater state
    }

    Nd4j.getExecutioner().commit();

    configureListeners(net, tuple.getCounter().getAndIncrement());

    if (configuration.isCollectTrainingStats())
        stats.logInitEnd();

    return net;
}
 
Example 4
Source File: TestCompareParameterAveragingSparkVsSingleMachine.java    From deeplearning4j with Apache License 2.0 4 votes vote down vote up
@Test
public void testOneExecutor() {
    //Idea: single worker/executor on Spark should give identical results to a single machine

    int miniBatchSize = 10;
    int nWorkers = 1;

    for (boolean saveUpdater : new boolean[] {true, false}) {
        JavaSparkContext sc = getContext(nWorkers);

        try {
            //Do training locally, for 3 minibatches
            int[] seeds = {1, 2, 3};

            MultiLayerNetwork net = new MultiLayerNetwork(getConf(12345, new RmsProp(0.5)));
            net.init();
            INDArray initialParams = net.params().dup();

            for (int i = 0; i < seeds.length; i++) {
                DataSet ds = getOneDataSet(miniBatchSize, seeds[i]);
                if (!saveUpdater)
                    net.setUpdater(null);
                net.fit(ds);
            }
            INDArray finalParams = net.params().dup();

            //Do training on Spark with one executor, for 3 separate minibatches
            TrainingMaster tm = getTrainingMaster(1, miniBatchSize, saveUpdater);
            SparkDl4jMultiLayer sparkNet = new SparkDl4jMultiLayer(sc, getConf(12345, new RmsProp(0.5)), tm);
            sparkNet.setCollectTrainingStats(true);
            INDArray initialSparkParams = sparkNet.getNetwork().params().dup();

            for (int i = 0; i < seeds.length; i++) {
                List<DataSet> list = getOneDataSetAsIndividalExamples(miniBatchSize, seeds[i]);
                JavaRDD<DataSet> rdd = sc.parallelize(list);

                sparkNet.fit(rdd);
            }

            INDArray finalSparkParams = sparkNet.getNetwork().params().dup();

            assertEquals(initialParams, initialSparkParams);
            assertNotEquals(initialParams, finalParams);
            assertEquals(finalParams, finalSparkParams);
        } finally {
            sc.stop();
        }
    }
}
 
Example 5
Source File: TestCompareParameterAveragingSparkVsSingleMachine.java    From deeplearning4j with Apache License 2.0 4 votes vote down vote up
@Test
    public void testAverageEveryStep() {
        //Idea: averaging every step with SGD (SGD updater + optimizer) is mathematically identical to doing the learning
        // on a single machine for synchronous distributed training
        //BUT: This is *ONLY* the case if all workers get an identical number of examples. This won't be the case if
        // we use RDD.randomSplit (which is what occurs if we use .fit(JavaRDD<DataSet> on a data set that needs splitting),
        // which might give a number of examples that isn't divisible by number of workers (like 39 examples on 4 executors)
        //This is also ONLY the case using SGD updater

        int miniBatchSizePerWorker = 10;
        int nWorkers = 4;


        for (boolean saveUpdater : new boolean[] {true, false}) {
            JavaSparkContext sc = getContext(nWorkers);

            try {
                //Do training locally, for 3 minibatches
                int[] seeds = {1, 2, 3};

                //                CudaGridExecutioner executioner = (CudaGridExecutioner) Nd4j.getExecutioner();

                MultiLayerNetwork net = new MultiLayerNetwork(getConf(12345, new Sgd(0.5)));
                net.init();
                INDArray initialParams = net.params().dup();
                //              executioner.addToWatchdog(initialParams, "initialParams");


                for (int i = 0; i < seeds.length; i++) {
                    DataSet ds = getOneDataSet(miniBatchSizePerWorker * nWorkers, seeds[i]);
                    if (!saveUpdater)
                        net.setUpdater(null);
                    net.fit(ds);
                }
                INDArray finalParams = net.params().dup();

                //Do training on Spark with one executor, for 3 separate minibatches
                //                TrainingMaster tm = getTrainingMaster(1, miniBatchSizePerWorker, saveUpdater);
                ParameterAveragingTrainingMaster tm = new ParameterAveragingTrainingMaster.Builder(1)
                                .averagingFrequency(1).batchSizePerWorker(miniBatchSizePerWorker)
                                .saveUpdater(saveUpdater).workerPrefetchNumBatches(0)
                                //                        .rddTrainingApproach(RDDTrainingApproach.Direct)
                                .rddTrainingApproach(RDDTrainingApproach.Export).build();
                SparkDl4jMultiLayer sparkNet = new SparkDl4jMultiLayer(sc, getConf(12345, new Sgd(0.5)), tm);
                sparkNet.setCollectTrainingStats(true);
                INDArray initialSparkParams = sparkNet.getNetwork().params().dup();

                //            executioner.addToWatchdog(initialSparkParams, "initialSparkParams");

                for (int i = 0; i < seeds.length; i++) {
                    List<DataSet> list = getOneDataSetAsIndividalExamples(miniBatchSizePerWorker * nWorkers, seeds[i]);
                    JavaRDD<DataSet> rdd = sc.parallelize(list);

                    sparkNet.fit(rdd);
                }

//                System.out.println(sparkNet.getSparkTrainingStats().statsAsString());
                sparkNet.getSparkTrainingStats().statsAsString();

                INDArray finalSparkParams = sparkNet.getNetwork().params().dup();

//                System.out.println("Initial (Local) params:       " + Arrays.toString(initialParams.data().asFloat()));
//                System.out.println("Initial (Spark) params:       "
//                                + Arrays.toString(initialSparkParams.data().asFloat()));
//                System.out.println("Final (Local) params: " + Arrays.toString(finalParams.data().asFloat()));
//                System.out.println("Final (Spark) params: " + Arrays.toString(finalSparkParams.data().asFloat()));
                assertEquals(initialParams, initialSparkParams);
                assertNotEquals(initialParams, finalParams);
                assertEquals(finalParams, finalSparkParams);

                double sparkScore = sparkNet.getScore();
                assertTrue(sparkScore > 0.0);

                assertEquals(net.score(), sparkScore, 1e-3);
            } finally {
                sc.stop();
            }
        }
    }
 
Example 6
Source File: TestCompareParameterAveragingSparkVsSingleMachine.java    From deeplearning4j with Apache License 2.0 4 votes vote down vote up
@Test
    public void testAverageEveryStepCNN() {
        //Idea: averaging every step with SGD (SGD updater + optimizer) is mathematically identical to doing the learning
        // on a single machine for synchronous distributed training
        //BUT: This is *ONLY* the case if all workers get an identical number of examples. This won't be the case if
        // we use RDD.randomSplit (which is what occurs if we use .fit(JavaRDD<DataSet> on a data set that needs splitting),
        // which might give a number of examples that isn't divisible by number of workers (like 39 examples on 4 executors)
        //This is also ONLY the case using SGD updater

        int miniBatchSizePerWorker = 10;
        int nWorkers = 4;


        for (boolean saveUpdater : new boolean[] {true, false}) {
            JavaSparkContext sc = getContext(nWorkers);

            try {
                //Do training locally, for 3 minibatches
                int[] seeds = {1, 2, 3};

                MultiLayerNetwork net = new MultiLayerNetwork(getConfCNN(12345, new Sgd(0.5)));
                net.init();
                INDArray initialParams = net.params().dup();

                for (int i = 0; i < seeds.length; i++) {
                    DataSet ds = getOneDataSetCNN(miniBatchSizePerWorker * nWorkers, seeds[i]);
                    if (!saveUpdater)
                        net.setUpdater(null);
                    net.fit(ds);
                }
                INDArray finalParams = net.params().dup();

                //Do training on Spark with one executor, for 3 separate minibatches
                ParameterAveragingTrainingMaster tm = new ParameterAveragingTrainingMaster.Builder(1)
                                .averagingFrequency(1).batchSizePerWorker(miniBatchSizePerWorker)
                                .saveUpdater(saveUpdater).workerPrefetchNumBatches(0)
                                .rddTrainingApproach(RDDTrainingApproach.Export).build();
                SparkDl4jMultiLayer sparkNet = new SparkDl4jMultiLayer(sc, getConfCNN(12345, new Sgd(0.5)), tm);
                sparkNet.setCollectTrainingStats(true);
                INDArray initialSparkParams = sparkNet.getNetwork().params().dup();

                for (int i = 0; i < seeds.length; i++) {
                    List<DataSet> list =
                                    getOneDataSetAsIndividalExamplesCNN(miniBatchSizePerWorker * nWorkers, seeds[i]);
                    JavaRDD<DataSet> rdd = sc.parallelize(list);

                    sparkNet.fit(rdd);
                }

//                System.out.println(sparkNet.getSparkTrainingStats().statsAsString());
                sparkNet.getSparkTrainingStats().statsAsString();

                INDArray finalSparkParams = sparkNet.getNetwork().params().dup();

//                System.out.println("Initial (Local) params:       " + Arrays.toString(initialParams.data().asFloat()));
//                System.out.println("Initial (Spark) params:       "
//                                + Arrays.toString(initialSparkParams.data().asFloat()));
//                System.out.println("Final (Local) params: " + Arrays.toString(finalParams.data().asFloat()));
//                System.out.println("Final (Spark) params: " + Arrays.toString(finalSparkParams.data().asFloat()));
                assertArrayEquals(initialParams.data().asFloat(), initialSparkParams.data().asFloat(), 1e-8f);
                assertArrayEquals(finalParams.data().asFloat(), finalSparkParams.data().asFloat(), 1e-6f);

                double sparkScore = sparkNet.getScore();
                assertTrue(sparkScore > 0.0);

                assertEquals(net.score(), sparkScore, 1e-3);
            } finally {
                sc.stop();
            }
        }
    }