Java Code Examples for org.deeplearning4j.nn.conf.MultiLayerConfiguration#Builder

The following examples show how to use org.deeplearning4j.nn.conf.MultiLayerConfiguration#Builder . You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.

Example 1

Source File: ConvolutionLayerSetupTest.java From deeplearning4j with Apache License 2.0

6 votes

public MultiLayerConfiguration.Builder complete() {
    final int numRows = 28;
    final int numColumns = 28;
    int nChannels = 1;
    int outputNum = 10;
    int seed = 123;

    MultiLayerConfiguration.Builder builder = new NeuralNetConfiguration.Builder().seed(seed)
                    .optimizationAlgo(OptimizationAlgorithm.LINE_GRADIENT_DESCENT).list()
                    .layer(0, new org.deeplearning4j.nn.conf.layers.ConvolutionLayer.Builder(new int[] {10, 10},
                                    new int[] {2, 2}).nIn(nChannels).nOut(6).build())
                    .layer(1, new SubsamplingLayer.Builder(SubsamplingLayer.PoolingType.MAX, new int[] {2, 2})
                                    .build())
                    .layer(2, new OutputLayer.Builder(LossFunctions.LossFunction.NEGATIVELOGLIKELIHOOD)
                                    .nIn(5 * 5 * 1 * 6) //216
                                    .nOut(outputNum).weightInit(WeightInit.XAVIER).activation(Activation.SOFTMAX)
                                    .build())
                    .inputPreProcessor(0, new FeedForwardToCnnPreProcessor(numRows, numColumns, nChannels))
                    .inputPreProcessor(2, new CnnToFeedForwardPreProcessor(5, 5, 6));

    return builder;
}

Example 2

Source File: ConvolutionLayerSetupTest.java From deeplearning4j with Apache License 2.0

6 votes

@Test
public void testDeconvolution() {

    MultiLayerConfiguration.Builder builder = new NeuralNetConfiguration.Builder().list()
            //out = stride * (in-1) + filter - 2*pad -> 2 * (28-1) + 2 - 0 = 56 -> 56x56x3
            .layer(0, new Deconvolution2D.Builder(2, 2).padding(0, 0).stride(2, 2).nIn(1).nOut(3).build())
            //(56-2+2*1)/2+1 = 29 -> 29x29x3
            .layer(1, new SubsamplingLayer.Builder().kernelSize(2, 2).padding(1, 1).stride(2, 2).build())
            .layer(2, new OutputLayer.Builder().nOut(3).activation(Activation.SOFTMAX).build())
            .setInputType(InputType.convolutional(28, 28, 1));

    MultiLayerConfiguration conf = builder.build();

    assertNotNull(conf.getInputPreProcess(2));
    assertTrue(conf.getInputPreProcess(2) instanceof CnnToFeedForwardPreProcessor);
    CnnToFeedForwardPreProcessor proc = (CnnToFeedForwardPreProcessor) conf.getInputPreProcess(2);
    assertEquals(29, proc.getInputHeight());
    assertEquals(29, proc.getInputWidth());
    assertEquals(3, proc.getNumChannels());

    assertEquals(29 * 29 * 3, ((FeedForwardLayer) conf.getConf(2).getLayer()).getNIn());
}

Example 3

Source File: ConvolutionLayerSetupTest.java From deeplearning4j with Apache License 2.0

6 votes

public MultiLayerConfiguration.Builder incompleteLFW() {
    MultiLayerConfiguration.Builder builder =
                    new NeuralNetConfiguration.Builder().seed(3)
                                    .optimizationAlgo(OptimizationAlgorithm.CONJUGATE_GRADIENT).list()
                                    .layer(0, new org.deeplearning4j.nn.conf.layers.ConvolutionLayer.Builder(
                                                    new int[] {5, 5}).nOut(6).build())
                                    .layer(1, new org.deeplearning4j.nn.conf.layers.SubsamplingLayer.Builder(
                                                    new int[] {2, 2}).build())
                                    .layer(2, new org.deeplearning4j.nn.conf.layers.ConvolutionLayer.Builder(
                                                    new int[] {5, 5}).nOut(6).build())
                                    .layer(3, new org.deeplearning4j.nn.conf.layers.SubsamplingLayer.Builder(
                                                    new int[] {2, 2}).build())
                                    .layer(4, new org.deeplearning4j.nn.conf.layers.OutputLayer.Builder(
                                                    LossFunctions.LossFunction.NEGATIVELOGLIKELIHOOD).activation(Activation.SOFTMAX)
                                            .nOut(2).build());
    return builder;
}

Example 4

Source File: ConvolutionLayerSetupTest.java From deeplearning4j with Apache License 2.0

6 votes

public MultiLayerConfiguration.Builder incompleteLRN() {
    MultiLayerConfiguration.Builder builder =
                    new NeuralNetConfiguration.Builder().seed(3)
                                    .optimizationAlgo(OptimizationAlgorithm.CONJUGATE_GRADIENT).list()
                                    .layer(0, new org.deeplearning4j.nn.conf.layers.ConvolutionLayer.Builder(
                                                    new int[] {5, 5}).nOut(6).build())
                                    .layer(1, new org.deeplearning4j.nn.conf.layers.SubsamplingLayer.Builder(
                                                    new int[] {2, 2}).build())
                                    .layer(2, new LocalResponseNormalization.Builder().build())
                                    .layer(3, new org.deeplearning4j.nn.conf.layers.ConvolutionLayer.Builder(
                                                    new int[] {5, 5}).nOut(6).build())
                                    .layer(4, new org.deeplearning4j.nn.conf.layers.SubsamplingLayer.Builder(
                                                    new int[] {2, 2}).build())
                                    .layer(5, new org.deeplearning4j.nn.conf.layers.OutputLayer.Builder(
                                            LossFunctions.LossFunction.NEGATIVELOGLIKELIHOOD).nOut(2)
                                            .activation(Activation.SOFTMAX).build());
    return builder;
}

Example 5

Source File: ConvolutionLayerTest.java From deeplearning4j with Apache License 2.0

6 votes

private static MultiLayerNetwork getCNNMLNConfig(boolean backprop, boolean pretrain) {
    int outputNum = 10;
    int seed = 123;

    MultiLayerConfiguration.Builder conf =
                    new NeuralNetConfiguration.Builder().seed(seed)
                                    .optimizationAlgo(OptimizationAlgorithm.LINE_GRADIENT_DESCENT).list()
                                    .layer(0, new ConvolutionLayer.Builder(new int[] {10, 10}).nOut(6).build())
                                    .layer(1, new SubsamplingLayer.Builder(SubsamplingLayer.PoolingType.MAX,
                                                    new int[] {2, 2}).stride(1, 1).build())
                                    .layer(2, new OutputLayer.Builder(
                                                    LossFunctions.LossFunction.NEGATIVELOGLIKELIHOOD)
                                                                    .nOut(outputNum).weightInit(WeightInit.XAVIER)
                                                                    .activation(Activation.SOFTMAX).build())
                                    .setInputType(InputType.convolutionalFlat(28, 28, 1));

    MultiLayerNetwork model = new MultiLayerNetwork(conf.build());
    model.init();

    return model;
}

Example 6

Source File: ConvolutionLayerTest.java From deeplearning4j with Apache License 2.0

5 votes

@Test
public void testTwdFirstLayer() throws Exception {
    MultiLayerConfiguration.Builder builder = new NeuralNetConfiguration.Builder().seed(123)
                    .optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT).l2(2e-4)
                    .updater(new Nesterovs(0.9)).dropOut(0.5)
                    .list().layer(0,
                                    new ConvolutionLayer.Builder(8, 8) //16 filters kernel size 8 stride 4
                                                    .stride(4, 4).nOut(16).dropOut(0.5)
                                                    .activation(Activation.RELU).weightInit(
                                                                    WeightInit.XAVIER)
                                                    .build())
                    .layer(1, new ConvolutionLayer.Builder(4, 4) //32 filters kernel size 4 stride 2
                                    .stride(2, 2).nOut(32).dropOut(0.5).activation(Activation.RELU)
                                    .weightInit(WeightInit.XAVIER).build())
                    .layer(2, new DenseLayer.Builder() //fully connected with 256 rectified units
                                    .nOut(256).activation(Activation.RELU).weightInit(WeightInit.XAVIER)
                                    .dropOut(0.5).build())
                    .layer(3, new OutputLayer.Builder(LossFunctions.LossFunction.SQUARED_LOSS) //output layer
                                    .nOut(10).weightInit(WeightInit.XAVIER).activation(Activation.SOFTMAX).build())
                    .setInputType(InputType.convolutionalFlat(28, 28, 1));

    DataSetIterator iter = new MnistDataSetIterator(10, 10);
    MultiLayerConfiguration conf = builder.build();
    MultiLayerNetwork network = new MultiLayerNetwork(conf);
    network.init();
    DataSet ds = iter.next();
    for( int i=0; i<5; i++ ) {
        network.fit(ds);
    }
}

Example 7

Source File: ManualTests.java From deeplearning4j with Apache License 2.0

5 votes

@Test
public void testCNNActivationsFrozen() throws Exception {

    int nChannels = 1;
    int outputNum = 10;
    int batchSize = 64;
    int nEpochs = 10;
    int seed = 123;

    log.info("Load data....");
    DataSetIterator mnistTrain = new MnistDataSetIterator(batchSize, true, 12345);

    log.info("Build model....");
    MultiLayerConfiguration.Builder builder = new NeuralNetConfiguration.Builder().seed(seed)
            .l2(0.0005)
            .weightInit(WeightInit.XAVIER)
            .updater(new Nesterovs(0.01, 0.9)).list()
            .layer(0, new FrozenLayer(new ConvolutionLayer.Builder(5, 5)
                    //nIn and nOut specify depth. nIn here is the nChannels and nOut is the number of filters to be applied
                    .nIn(nChannels).stride(1, 1).nOut(20).activation(Activation.IDENTITY).build()))
            .layer(1, new FrozenLayer(new SubsamplingLayer.Builder(SubsamplingLayer.PoolingType.MAX).kernelSize(2, 2)
                    .stride(2, 2).build()))
            .layer(2, new FrozenLayer(new DenseLayer.Builder().activation(Activation.RELU).nOut(500).build()))
            .layer(3, new OutputLayer.Builder(LossFunctions.LossFunction.NEGATIVELOGLIKELIHOOD)
                    .nOut(outputNum).activation(Activation.SOFTMAX).build())
            .setInputType(InputType.convolutionalFlat(28, 28, nChannels));

    MultiLayerConfiguration conf = builder.build();
    MultiLayerNetwork model = new MultiLayerNetwork(conf);
    model.init();

    log.info("Train model....");
    model.setListeners(new ConvolutionalIterationListener(1));

    for (int i = 0; i < nEpochs; i++) {
        model.fit(mnistTrain);
    }
}

Example 8

Source File: ConvolutionLayerSetupTest.java From deeplearning4j with Apache License 2.0

5 votes

@Test
public void testDenseToOutputLayer() {
    Nd4j.getRandom().setSeed(12345);
    final int numRows = 76;
    final int numColumns = 76;
    int nChannels = 3;
    int outputNum = 6;
    int seed = 123;

    //setup the network
    MultiLayerConfiguration.Builder builder = new NeuralNetConfiguration.Builder().seed(seed)
                    .l1(1e-1).l2(2e-4).dropOut(0.5).miniBatch(true)
                    .optimizationAlgo(OptimizationAlgorithm.CONJUGATE_GRADIENT).list()
                    .layer(0, new ConvolutionLayer.Builder(5, 5).nOut(5).dropOut(0.5).weightInit(WeightInit.XAVIER)
                                    .activation(Activation.RELU).build())
                    .layer(1, new SubsamplingLayer.Builder(SubsamplingLayer.PoolingType.MAX, new int[] {2, 2})
                                    .build())
                    .layer(2, new ConvolutionLayer.Builder(3, 3).nOut(10).dropOut(0.5).weightInit(WeightInit.XAVIER)
                                    .activation(Activation.RELU).build())
                    .layer(3, new SubsamplingLayer.Builder(SubsamplingLayer.PoolingType.MAX, new int[] {2, 2})
                                    .build())
                    .layer(4, new DenseLayer.Builder().nOut(100).activation(Activation.RELU).build())
                    .layer(5, new OutputLayer.Builder(LossFunctions.LossFunction.NEGATIVELOGLIKELIHOOD)
                                    .nOut(outputNum).weightInit(WeightInit.XAVIER).activation(Activation.SOFTMAX)
                                    .build())

                    .setInputType(InputType.convolutional(numRows, numColumns, nChannels));

    DataSet d = new DataSet(Nd4j.rand(new int[]{10, nChannels, numRows, numColumns}),
                    FeatureUtil.toOutcomeMatrix(new int[] {1, 1, 1, 1, 1, 1, 1, 1, 1, 1}, 6));
    MultiLayerNetwork network = new MultiLayerNetwork(builder.build());
    network.init();
    network.fit(d);

}

Example 9

Source File: TestListeners.java From deeplearning4j with Apache License 2.0

5 votes

@Test
public void testListeners() {
    TestListener.clearCounts();

    MultiLayerConfiguration.Builder builder = new NeuralNetConfiguration.Builder().list().layer(0,
                    new OutputLayer.Builder(LossFunctions.LossFunction.MSE).nIn(10).nOut(10)
                                    .activation(Activation.TANH).build());

    MultiLayerConfiguration conf = builder.build();
    MultiLayerNetwork model = new MultiLayerNetwork(conf);
    model.init();

    testListenersForModel(model, Collections.singletonList(new TestListener()));
}

Example 10

Source File: LRNGradientCheckTests.java From deeplearning4j with Apache License 2.0

5 votes

@Test
    public void testGradientLRNSimple() {
        Nd4j.getRandom().setSeed(12345);
        int minibatch = 10;
        int depth = 6;
        int hw = 5;
        int nOut = 4;
        INDArray input = Nd4j.rand(new int[] {minibatch, depth, hw, hw});
        INDArray labels = Nd4j.zeros(minibatch, nOut);
        Random r = new Random(12345);
        for (int i = 0; i < minibatch; i++) {
            labels.putScalar(i, r.nextInt(nOut), 1.0);
        }

        MultiLayerConfiguration.Builder builder = new NeuralNetConfiguration.Builder().updater(new NoOp())
                        .dataType(DataType.DOUBLE)
                        .seed(12345L)
                        .dist(new NormalDistribution(0, 2)).list()
                        .layer(0, new ConvolutionLayer.Builder().nOut(6).kernelSize(2, 2).stride(1, 1)
                                        .activation(Activation.TANH).build())
                        .layer(1, new LocalResponseNormalization.Builder().build())
                        .layer(2, new OutputLayer.Builder(LossFunctions.LossFunction.MCXENT)
                                        .activation(Activation.SOFTMAX).nOut(nOut).build())
                        .setInputType(InputType.convolutional(hw, hw, depth));

        MultiLayerNetwork mln = new MultiLayerNetwork(builder.build());
        mln.init();

//        if (PRINT_RESULTS) {
//            for (int j = 0; j < mln.getnLayers(); j++)
//                System.out.println("Layer " + j + " # params: " + mln.getLayer(j).numParams());
//        }

        boolean gradOK = GradientCheckUtil.checkGradients(mln, DEFAULT_EPS, DEFAULT_MAX_REL_ERROR,
                        DEFAULT_MIN_ABS_ERROR, PRINT_RESULTS, RETURN_ON_FIRST_FAILURE, input, labels);

        assertTrue(gradOK);
        TestUtils.testModelSerialization(mln);
    }

Example 11

Source File: ParallelWrapperTest.java From deeplearning4j with Apache License 2.0

4 votes

@Test
public void testParallelWrapperRun() throws Exception {

    int nChannels = 1;
    int outputNum = 10;

    // for GPU you usually want to have higher batchSize
    int batchSize = 128;
    int nEpochs = 5;
    int seed = 123;

    log.info("Load data....");
    DataSetIterator mnistTrain = new EarlyTerminationDataSetIterator(new MnistDataSetIterator(batchSize, true, 12345), 15);
    DataSetIterator mnistTest = new EarlyTerminationDataSetIterator(new MnistDataSetIterator(batchSize, false, 12345), 4);

    assertTrue(mnistTrain.hasNext());
    val t0 = mnistTrain.next();

    log.info("F: {}; L: {};", t0.getFeatures().shape(), t0.getLabels().shape());

    log.info("Build model....");
    MultiLayerConfiguration.Builder builder = new NeuralNetConfiguration.Builder().seed(seed)
                    .l2(0.0005)
                    //.learningRateDecayPolicy(LearningRatePolicy.Inverse).lrPolicyDecayRate(0.001).lrPolicyPower(0.75)
                    .weightInit(WeightInit.XAVIER)
                    .updater(new Nesterovs(0.01, 0.9)).list()
                    .layer(0, new ConvolutionLayer.Builder(5, 5)
                                    //nIn and nOut specify depth. nIn here is the nChannels and nOut is the number of filters to be applied
                                    .nIn(nChannels).stride(1, 1).nOut(20).activation(Activation.IDENTITY).build())
                    .layer(1, new SubsamplingLayer.Builder(SubsamplingLayer.PoolingType.MAX).kernelSize(2, 2)
                                    .stride(2, 2).build())
                    .layer(2, new ConvolutionLayer.Builder(5, 5)
                                    //Note that nIn needed be specified in later layers
                                    .stride(1, 1).nOut(50).activation(Activation.IDENTITY).build())
                    .layer(3, new SubsamplingLayer.Builder(SubsamplingLayer.PoolingType.MAX).kernelSize(2, 2)
                                    .stride(2, 2).build())
                    .layer(4, new DenseLayer.Builder().activation(Activation.RELU).nOut(500).build())
                    .layer(5, new OutputLayer.Builder(LossFunctions.LossFunction.NEGATIVELOGLIKELIHOOD)
                                    .nOut(outputNum).activation(Activation.SOFTMAX).build())
                    .setInputType(InputType.convolutionalFlat(28, 28, nChannels));

    MultiLayerConfiguration conf = builder.build();
    MultiLayerNetwork model = new MultiLayerNetwork(conf);
    model.init();

    // ParallelWrapper will take care of load balancing between GPUs.
    ParallelWrapper wrapper = new ParallelWrapper.Builder(model)
                    // DataSets prefetching options. Set this value with respect to number of actual devices
                    .prefetchBuffer(24)

                    // set number of workers equal or higher then number of available devices. x1-x2 are good values to start with
                    .workers(2)

                    // rare averaging improves performance, but might reduce model accuracy
                    .averagingFrequency(3)

                    // if set to TRUE, on every averaging model score will be reported
                    .reportScoreAfterAveraging(true)

                    // optinal parameter, set to false ONLY if your system has support P2P memory access across PCIe (hint: AWS do not support P2P)
                    .build();

    log.info("Train model....");
    model.setListeners(new ScoreIterationListener(100));
    long timeX = System.currentTimeMillis();

    // optionally you might want to use MultipleEpochsIterator instead of manually iterating/resetting over your iterator
    //MultipleEpochsIterator mnistMultiEpochIterator = new MultipleEpochsIterator(nEpochs, mnistTrain);

    for (int i = 0; i < nEpochs; i++) {
        long time1 = System.currentTimeMillis();

        // Please note: we're feeding ParallelWrapper with iterator, not model directly
        //            wrapper.fit(mnistMultiEpochIterator);
        wrapper.fit(mnistTrain);
        long time2 = System.currentTimeMillis();
        log.info("*** Completed epoch {}, time: {} ***", i, (time2 - time1));
    }
    long timeY = System.currentTimeMillis();
    log.info("*** Training complete, time: {} ***", (timeY - timeX));

    Evaluation eval = model.evaluate(mnistTest);
    log.info(eval.stats());
    mnistTest.reset();

    double acc = eval.accuracy();
    assertTrue(String.valueOf(acc), acc > 0.5);

    wrapper.shutdown();
}

Example 12

Source File: NeuralNetworks.java From Machine-Learning-in-Java with MIT License

4 votes

private static MultiLayerNetwork deepConvNetwork(int seed, int iterations,
		int numRows, int numColumns, int outputNum) {
	MultiLayerConfiguration.Builder conf = new NeuralNetConfiguration.Builder()
			.seed(seed)
			.iterations(iterations)
			.activation("sigmoid")
			.weightInit(WeightInit.DISTRIBUTION)
			.dist(new NormalDistribution(0.0, 0.01))
			// .learningRate(7*10e-5)
			.learningRate(1e-3)
			.learningRateScoreBasedDecayRate(1e-1)
			.optimizationAlgo(
					OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT)
			.list(7)
			.layer(0,
					new ConvolutionLayer.Builder(new int[] { 5, 5 },
							new int[] { 1, 1 }).name("cnn1")
							.nIn(numRows * numColumns).nOut(6).build())
			.layer(1,
					new SubsamplingLayer.Builder(
							SubsamplingLayer.PoolingType.MAX, new int[] {
									2, 2 }, new int[] { 2, 2 }).name(
							"maxpool1").build())
			.layer(2,
					new ConvolutionLayer.Builder(new int[] { 5, 5 },
							new int[] { 1, 1 }).name("cnn2").nOut(16)
							.biasInit(1).build())
			.layer(3,
					new SubsamplingLayer.Builder(
							SubsamplingLayer.PoolingType.MAX, new int[] {
									2, 2 }, new int[] { 2, 2 }).name(
							"maxpool2").build())
			.layer(4,
					new DenseLayer.Builder().name("ffn1").nOut(120).build())
			.layer(5,
					new DenseLayer.Builder().name("ffn2").nOut(84).build())
			.layer(6,
					new OutputLayer.Builder(
							LossFunctions.LossFunction.NEGATIVELOGLIKELIHOOD)
							.name("output").nOut(outputNum)
							.activation("softmax") // radial basis function
													// required
							.build()).backprop(true).pretrain(false)
			.cnnInputSize(numRows, numColumns, 1);

	MultiLayerNetwork model = new MultiLayerNetwork(conf.build());

	return model;
}

Example 13

Source File: CuDNNGradientChecks.java From deeplearning4j with Apache License 2.0

4 votes

@Test
public void testLRN() throws Exception {

    Nd4j.getRandom().setSeed(12345);
    int minibatch = 10;
    int depth = 6;
    int hw = 5;
    int nOut = 4;
    INDArray input = Nd4j.rand(new int[] {minibatch, depth, hw, hw});
    INDArray labels = Nd4j.zeros(minibatch, nOut);
    Random r = new Random(12345);
    for (int i = 0; i < minibatch; i++) {
        labels.putScalar(i, r.nextInt(nOut), 1.0);
    }

    MultiLayerConfiguration.Builder builder = new NeuralNetConfiguration.Builder().updater(new NoOp())
                    .dataType(DataType.DOUBLE)
                    .seed(12345L)
                    .dist(new NormalDistribution(0, 2)).list()
                    .layer(0, new ConvolutionLayer.Builder().nOut(6).kernelSize(2, 2).stride(1, 1)
                                    .activation(Activation.TANH).build())
                    .layer(1, new LocalResponseNormalization.Builder().build())
                    .layer(2, new OutputLayer.Builder(LossFunctions.LossFunction.MCXENT)
                                    .activation(Activation.SOFTMAX).nOut(nOut).build())
                    .setInputType(InputType.convolutional(hw, hw, depth));

    MultiLayerNetwork mln = new MultiLayerNetwork(builder.build());
    mln.init();

    Field f = org.deeplearning4j.nn.layers.normalization.LocalResponseNormalization.class
                    .getDeclaredField("helper");
    f.setAccessible(true);

    org.deeplearning4j.nn.layers.normalization.LocalResponseNormalization l =
                    (org.deeplearning4j.nn.layers.normalization.LocalResponseNormalization) mln.getLayer(1);
    LocalResponseNormalizationHelper lrn = (LocalResponseNormalizationHelper) f.get(l);
    assertTrue(lrn instanceof CudnnLocalResponseNormalizationHelper);

    //-------------------------------
    //For debugging/comparison to no-cudnn case: set helper field to null
    //        f.set(l, null);
    //        assertNull(f.get(l));
    //-------------------------------

    if (PRINT_RESULTS) {
        for (int j = 0; j < mln.getnLayers(); j++)
            System.out.println("Layer " + j + " # params: " + mln.getLayer(j).numParams());
    }

    boolean gradOK = GradientCheckUtil.checkGradients(mln, DEFAULT_EPS, DEFAULT_MAX_REL_ERROR,
                    DEFAULT_MIN_ABS_ERROR, PRINT_RESULTS, RETURN_ON_FIRST_FAILURE, input, labels);

    assertTrue(gradOK);
}

Example 14

Source File: CNNGradientCheckTest.java From deeplearning4j with Apache License 2.0

4 votes

@Test
    public void testGradientCNNL1L2MLN() {
        if(this.format != CNN2DFormat.NCHW) //Only test NCHW due to flat input format...
            return;

        //Parameterized test, testing combinations of:
        // (a) activation function
        // (b) Whether to test at random initialization, or after some learning (i.e., 'characteristic mode of operation')
        // (c) Loss function (with specified output activations)

        DataSet ds = new IrisDataSetIterator(150, 150).next();
        ds.normalizeZeroMeanZeroUnitVariance();
        INDArray input = ds.getFeatures();
        INDArray labels = ds.getLabels();

        //use l2vals[i] with l1vals[i]
        double[] l2vals = {0.4, 0.0, 0.4, 0.4};
        double[] l1vals = {0.0, 0.0, 0.5, 0.0};
        double[] biasL2 = {0.0, 0.0, 0.0, 0.2};
        double[] biasL1 = {0.0, 0.0, 0.6, 0.0};
        Activation[] activFns = {Activation.SIGMOID, Activation.TANH, Activation.ELU, Activation.SOFTPLUS};
        boolean[] characteristic = {false, true, false, true}; //If true: run some backprop steps first

        LossFunctions.LossFunction[] lossFunctions =
                {LossFunctions.LossFunction.NEGATIVELOGLIKELIHOOD, LossFunctions.LossFunction.MSE, LossFunctions.LossFunction.NEGATIVELOGLIKELIHOOD, LossFunctions.LossFunction.MSE};
        Activation[] outputActivations = {Activation.SOFTMAX, Activation.TANH, Activation.SOFTMAX, Activation.IDENTITY}; //i.e., lossFunctions[i] used with outputActivations[i] here

        for( int i=0; i<l2vals.length; i++ ){
            Activation afn = activFns[i];
            boolean doLearningFirst = characteristic[i];
            LossFunctions.LossFunction lf = lossFunctions[i];
            Activation outputActivation = outputActivations[i];
            double l2 = l2vals[i];
            double l1 = l1vals[i];

            MultiLayerConfiguration.Builder builder = new NeuralNetConfiguration.Builder()
                    .dataType(DataType.DOUBLE)
                    .l2(l2).l1(l1).l2Bias(biasL2[i]).l1Bias(biasL1[i])
                    .optimizationAlgo(
                            OptimizationAlgorithm.CONJUGATE_GRADIENT)
                    .seed(12345L).list()
                    .layer(0, new ConvolutionLayer.Builder(new int[]{1, 1}).nIn(1).nOut(6)
                            .weightInit(WeightInit.XAVIER).activation(afn)
                            .updater(new NoOp()).build())
                    .layer(1, new OutputLayer.Builder(lf).activation(outputActivation).nOut(3)
                            .weightInit(WeightInit.XAVIER).updater(new NoOp()).build())

                    .setInputType(InputType.convolutionalFlat(1, 4, 1));

            MultiLayerConfiguration conf = builder.build();

            MultiLayerNetwork mln = new MultiLayerNetwork(conf);
            mln.init();
            String testName = new Object() {
            }.getClass().getEnclosingMethod().getName();

            if (doLearningFirst) {
                //Run a number of iterations of learning
                mln.setInput(ds.getFeatures());
                mln.setLabels(ds.getLabels());
                mln.computeGradientAndScore();
                double scoreBefore = mln.score();
                for (int j = 0; j < 10; j++)
                    mln.fit(ds);
                mln.computeGradientAndScore();
                double scoreAfter = mln.score();
                //Can't test in 'characteristic mode of operation' if not learning
                String msg = testName
                        + "- score did not (sufficiently) decrease during learning - activationFn="
                        + afn + ", lossFn=" + lf + ", outputActivation=" + outputActivation
                        + ", doLearningFirst=" + doLearningFirst + " (before=" + scoreBefore
                        + ", scoreAfter=" + scoreAfter + ")";
                assertTrue(msg, scoreAfter < 0.8 * scoreBefore);
            }

            if (PRINT_RESULTS) {
                System.out.println(testName + "- activationFn=" + afn + ", lossFn=" + lf
                        + ", outputActivation=" + outputActivation + ", doLearningFirst="
                        + doLearningFirst);
//                for (int j = 0; j < mln.getnLayers(); j++)
//                    System.out.println("Layer " + j + " # params: " + mln.getLayer(j).numParams());
            }

            boolean gradOK = GradientCheckUtil.checkGradients(mln, DEFAULT_EPS, DEFAULT_MAX_REL_ERROR,
                    DEFAULT_MIN_ABS_ERROR, PRINT_RESULTS, RETURN_ON_FIRST_FAILURE, input, labels);

            assertTrue(gradOK);
            TestUtils.testModelSerialization(mln);
        }
    }

Example 15

Source File: CNNGradientCheckTest.java From deeplearning4j with Apache License 2.0

4 votes

@Test
public void testGradientCNNMLN() {
    //Parameterized test, testing combinations of:
    // (a) activation function
    // (b) Whether to test at random initialization, or after some learning (i.e., 'characteristic mode of operation')
    // (c) Loss function (with specified output activations)
    Activation[] activFns = {Activation.SIGMOID, Activation.TANH};
    boolean[] characteristic = {false, true}; //If true: run some backprop steps first

    LossFunctions.LossFunction[] lossFunctions =
            {LossFunctions.LossFunction.NEGATIVELOGLIKELIHOOD, LossFunctions.LossFunction.MSE};
    Activation[] outputActivations = {Activation.SOFTMAX, Activation.TANH}; //i.e., lossFunctions[i] used with outputActivations[i] here

    DataSet ds = new IrisDataSetIterator(150, 150).next();
    ds.normalizeZeroMeanZeroUnitVariance();
    INDArray input = ds.getFeatures();
    INDArray labels = ds.getLabels();

    for (Activation afn : activFns) {
        for (boolean doLearningFirst : characteristic) {
            for (int i = 0; i < lossFunctions.length; i++) {
                LossFunctions.LossFunction lf = lossFunctions[i];
                Activation outputActivation = outputActivations[i];

                MultiLayerConfiguration.Builder builder = new NeuralNetConfiguration.Builder()
                        .dataType(DataType.DOUBLE)
                        .optimizationAlgo(OptimizationAlgorithm.CONJUGATE_GRADIENT).updater(new NoOp())
                        .weightInit(WeightInit.XAVIER).seed(12345L).list()
                        .layer(0, new ConvolutionLayer.Builder(1, 1).nOut(6).activation(afn)
                                .cudnnAllowFallback(false)
                                .build())
                        .layer(1, new OutputLayer.Builder(lf).activation(outputActivation).nOut(3).build())
                        .setInputType(InputType.convolutionalFlat(1, 4, 1));

                MultiLayerConfiguration conf = builder.build();

                MultiLayerNetwork mln = new MultiLayerNetwork(conf);
                mln.init();
                String name = new Object() {
                }.getClass().getEnclosingMethod().getName();

                if (doLearningFirst) {
                    //Run a number of iterations of learning
                    mln.setInput(ds.getFeatures());
                    mln.setLabels(ds.getLabels());
                    mln.computeGradientAndScore();
                    double scoreBefore = mln.score();
                    for (int j = 0; j < 10; j++)
                        mln.fit(ds);
                    mln.computeGradientAndScore();
                    double scoreAfter = mln.score();
                    //Can't test in 'characteristic mode of operation' if not learning
                    String msg = name + " - score did not (sufficiently) decrease during learning - activationFn="
                            + afn + ", lossFn=" + lf + ", outputActivation=" + outputActivation
                            + ", doLearningFirst= " + doLearningFirst + " (before=" + scoreBefore
                            + ", scoreAfter=" + scoreAfter + ")";
                    assertTrue(msg, scoreAfter < 0.8 * scoreBefore);
                }

                if (PRINT_RESULTS) {
                    System.out.println(name + " - activationFn=" + afn + ", lossFn=" + lf + ", outputActivation="
                            + outputActivation + ", doLearningFirst=" + doLearningFirst);
                }

                boolean gradOK = GradientCheckUtil.checkGradients(mln, DEFAULT_EPS, DEFAULT_MAX_REL_ERROR,
                        DEFAULT_MIN_ABS_ERROR, PRINT_RESULTS, RETURN_ON_FIRST_FAILURE, input, labels);

                assertTrue(gradOK);
                TestUtils.testModelSerialization(mln);
            }
        }
    }
}

Example 16

Source File: SymmetricTrainerContextTest.java From deeplearning4j with Apache License 2.0

4 votes

@Test
public void testEqualUuid1() {
    MultiLayerConfiguration.Builder builder = new NeuralNetConfiguration.Builder().seed(seed)
            .l2(0.0005)
            //.learningRateDecayPolicy(LearningRatePolicy.Inverse).lrPolicyDecayRate(0.001).lrPolicyPower(0.75)
            .weightInit(WeightInit.XAVIER)
            .updater(new Nesterovs(0.01, 0.9)).list()
            .layer(0, new ConvolutionLayer.Builder(5, 5)
                    //nIn and nOut specify depth. nIn here is the nChannels and nOut is the number of filters to be applied
                    .nIn(nChannels).stride(1, 1).nOut(20).activation(Activation.IDENTITY).build())
            .layer(1, new SubsamplingLayer.Builder(SubsamplingLayer.PoolingType.MAX).kernelSize(2, 2)
                    .stride(2, 2).build())
            .layer(2, new ConvolutionLayer.Builder(5, 5)
                    //Note that nIn needed be specified in later layers
                    .stride(1, 1).nOut(50).activation(Activation.IDENTITY).build())
            .layer(3, new SubsamplingLayer.Builder(SubsamplingLayer.PoolingType.MAX).kernelSize(2, 2)
                    .stride(2, 2).build())
            .layer(4, new DenseLayer.Builder().activation(Activation.RELU).nOut(500).build())
            .layer(5, new OutputLayer.Builder(LossFunctions.LossFunction.NEGATIVELOGLIKELIHOOD)
                    .nOut(outputNum).activation(Activation.SOFTMAX).build())
            .setInputType(InputType.convolutionalFlat(28, 28, nChannels));

    MultiLayerConfiguration conf = builder.build();
    MultiLayerNetwork model = new MultiLayerNetwork(conf);
    model.init();

    // ParallelWrapper will take care of load balancing between GPUs.
    ParallelWrapper wrapper = new ParallelWrapper.Builder(model)
            // DataSets prefetching options. Set this value with respect to number of actual devices
            .prefetchBuffer(24)

            // set number of workers equal or higher then number of available devices. x1-x2 are good values to start with
            .workers(2)

            // rare averaging improves performance, but might reduce model accuracy
            .averagingFrequency(3)

            // if set to TRUE, on every averaging model score will be reported
            .reportScoreAfterAveraging(true)

            // optinal parameter, set to false ONLY if your system has support P2P memory access across PCIe (hint: AWS do not support P2P)
            .build();

    val trainer = new SymmetricTrainer(model, "alpha", 3, WorkspaceMode.NONE, wrapper, true);

    assertEquals("alpha_thread_3", trainer.getUuid());
}

Example 17

Source File: ParallelWrapperMainTest.java From deeplearning4j with Apache License 2.0

4 votes

@Test
public void runParallelWrapperMain() throws Exception {

    int nChannels = 1;
    int outputNum = 10;

    // for GPU you usually want to have higher batchSize
    int batchSize = 128;
    int seed = 123;
    int uiPort = 9500;
    System.setProperty("org.deeplearning4j.ui.port", String.valueOf(uiPort));
    log.info("Load data....");
    DataSetIterator mnistTrain = new MnistDataSetIterator(batchSize, true, 12345);
    DataSetIterator mnistTest = new MnistDataSetIterator(batchSize, false, 12345);

    log.info("Build model....");
    MultiLayerConfiguration.Builder builder = new NeuralNetConfiguration.Builder().seed(seed)
                    .l2(0.0005)
                    .weightInit(WeightInit.XAVIER)
                    .updater(new Nesterovs(0.01, 0.9)).list()
                    .layer(0, new ConvolutionLayer.Builder(5, 5)
                                    //nIn and nOut specify channels. nIn here is the nChannels and nOut is the number of filters to be applied
                                    .nIn(nChannels).stride(1, 1).nOut(20).activation(Activation.IDENTITY).build())
                    .layer(1, new SubsamplingLayer.Builder(SubsamplingLayer.PoolingType.MAX).kernelSize(2, 2)
                                    .stride(2, 2).build())
                    .layer(2, new ConvolutionLayer.Builder(5, 5)
                                    //Note that nIn needed be specified in later layers
                                    .stride(1, 1).nOut(50).activation(Activation.IDENTITY).build())
                    .layer(3, new SubsamplingLayer.Builder(SubsamplingLayer.PoolingType.MAX).kernelSize(2, 2)
                                    .stride(2, 2).build())
                    .layer(4, new DenseLayer.Builder().activation(Activation.RELU).nOut(500).build())
                    .layer(5, new OutputLayer.Builder(LossFunctions.LossFunction.NEGATIVELOGLIKELIHOOD)
                                    .nOut(outputNum).activation(Activation.SOFTMAX).build())
                    .setInputType(InputType.convolutionalFlat(28, 28, nChannels));

    MultiLayerConfiguration conf = builder.build();
    MultiLayerNetwork model = new MultiLayerNetwork(conf);
    model.init();
    File tempModel = testDir.newFile("tmpmodel.zip");
    tempModel.deleteOnExit();
    ModelSerializer.writeModel(model, tempModel, false);
    File tmp = testDir.newFile("tmpmodel.bin");
    tmp.deleteOnExit();
    ParallelWrapperMain parallelWrapperMain = new ParallelWrapperMain();
    try {
        parallelWrapperMain.runMain(new String[]{"--modelPath", tempModel.getAbsolutePath(),
                "--dataSetIteratorFactoryClazz", MnistDataSetIteratorProviderFactory.class.getName(),
                "--modelOutputPath", tmp.getAbsolutePath(), "--uiUrl", "localhost:" + uiPort});
    } finally {
        parallelWrapperMain.stop();
    }
}

Example 18

Source File: CuDNNGradientChecks.java From deeplearning4j with Apache License 2.0

4 votes

@Test
public void testConvolutional() throws Exception {

    //Parameterized test, testing combinations of:
    // (a) activation function
    // (b) Whether to test at random initialization, or after some learning (i.e., 'characteristic mode of operation')
    // (c) Loss function (with specified output activations)
    Activation[] activFns = {Activation.SIGMOID, Activation.TANH};
    boolean[] characteristic = {false, true}; //If true: run some backprop steps first

    int[] minibatchSizes = {1, 4};
    int width = 6;
    int height = 6;
    int inputDepth = 2;
    int nOut = 3;

    Field f = org.deeplearning4j.nn.layers.convolution.ConvolutionLayer.class.getDeclaredField("helper");
    f.setAccessible(true);

    Random r = new Random(12345);
    for (Activation afn : activFns) {
        for (boolean doLearningFirst : characteristic) {
            for (int minibatchSize : minibatchSizes) {

                INDArray input = Nd4j.rand(new int[] {minibatchSize, inputDepth, height, width});
                INDArray labels = Nd4j.zeros(minibatchSize, nOut);
                for (int i = 0; i < minibatchSize; i++) {
                    labels.putScalar(i, r.nextInt(nOut), 1.0);
                }

                MultiLayerConfiguration.Builder builder = new NeuralNetConfiguration.Builder()
                        .dataType(DataType.DOUBLE)
                                .optimizationAlgo(OptimizationAlgorithm.CONJUGATE_GRADIENT)
                                .dist(new UniformDistribution(-1, 1))
                                .updater(new NoOp()).seed(12345L).list()
                                .layer(0, new ConvolutionLayer.Builder(2, 2).stride(2, 2).padding(1, 1).nOut(3)
                                                .activation(afn).build())
                                .layer(1, new ConvolutionLayer.Builder(2, 2).stride(2, 2).padding(0, 0).nOut(3)
                                                .activation(afn).build())
                                .layer(2, new OutputLayer.Builder(LossFunctions.LossFunction.MCXENT)
                                                .activation(Activation.SOFTMAX).nOut(nOut).build())
                                .setInputType(InputType.convolutional(height, width, inputDepth))
                                ;

                MultiLayerConfiguration conf = builder.build();

                MultiLayerNetwork mln = new MultiLayerNetwork(conf);
                mln.init();

                org.deeplearning4j.nn.layers.convolution.ConvolutionLayer c0 =
                                (org.deeplearning4j.nn.layers.convolution.ConvolutionLayer) mln.getLayer(0);
                ConvolutionHelper ch0 = (ConvolutionHelper) f.get(c0);
                assertTrue(ch0 instanceof CudnnConvolutionHelper);

                org.deeplearning4j.nn.layers.convolution.ConvolutionLayer c1 =
                                (org.deeplearning4j.nn.layers.convolution.ConvolutionLayer) mln.getLayer(1);
                ConvolutionHelper ch1 = (ConvolutionHelper) f.get(c1);
                assertTrue(ch1 instanceof CudnnConvolutionHelper);

                //-------------------------------
                //For debugging/comparison to no-cudnn case: set helper field to null
                //                    f.set(c0, null);
                //                    f.set(c1, null);
                //                    assertNull(f.get(c0));
                //                    assertNull(f.get(c1));
                //-------------------------------


                String name = new Object() {}.getClass().getEnclosingMethod().getName();

                if (doLearningFirst) {
                    //Run a number of iterations of learning
                    mln.setInput(input);
                    mln.setLabels(labels);
                    mln.computeGradientAndScore();
                    double scoreBefore = mln.score();
                    for (int j = 0; j < 10; j++)
                        mln.fit(input, labels);
                    mln.computeGradientAndScore();
                    double scoreAfter = mln.score();
                    //Can't test in 'characteristic mode of operation' if not learning
                    String msg = name + " - score did not (sufficiently) decrease during learning - activationFn="
                                    + afn + ", doLearningFirst= " + doLearningFirst + " (before=" + scoreBefore
                                    + ", scoreAfter=" + scoreAfter + ")";
                    assertTrue(msg, scoreAfter < 0.8 * scoreBefore);
                }

                if (PRINT_RESULTS) {
                    System.out.println(name + " - activationFn=" + afn + ", doLearningFirst=" + doLearningFirst);
                    for (int j = 0; j < mln.getnLayers(); j++)
                        System.out.println("Layer " + j + " # params: " + mln.getLayer(j).numParams());
                }

                boolean gradOK = GradientCheckUtil.checkGradients(mln, DEFAULT_EPS, DEFAULT_MAX_REL_ERROR,
                                DEFAULT_MIN_ABS_ERROR, PRINT_RESULTS, RETURN_ON_FIRST_FAILURE, input, labels);

                assertTrue(gradOK);
            }
        }
    }
}

Example 19

Source File: BNGradientCheckTest.java From deeplearning4j with Apache License 2.0

4 votes

@Test
    public void testGradientCnnSimple() {
        Nd4j.getRandom().setSeed(12345);
        int minibatch = 10;
        int depth = 1;
        int hw = 4;
        int nOut = 4;
        INDArray input = Nd4j.rand(new int[]{minibatch, depth, hw, hw});
        INDArray labels = Nd4j.zeros(minibatch, nOut);
        Random r = new Random(12345);
        for (int i = 0; i < minibatch; i++) {
            labels.putScalar(i, r.nextInt(nOut), 1.0);
        }

        for (boolean useLogStd : new boolean[]{true, false}) {
            MultiLayerConfiguration.Builder builder = new NeuralNetConfiguration.Builder()
                    .dataType(DataType.DOUBLE)
                    .updater(new NoOp()).seed(12345L)
                    .dist(new NormalDistribution(0, 2)).list()
                    .layer(0, new ConvolutionLayer.Builder().kernelSize(2, 2).stride(1, 1).nIn(depth).nOut(2)
                            .activation(Activation.IDENTITY).build())
                    .layer(1, new BatchNormalization.Builder().useLogStd(useLogStd).build())
                    .layer(2, new ActivationLayer.Builder().activation(Activation.TANH).build())
                    .layer(3, new OutputLayer.Builder(LossFunctions.LossFunction.MCXENT)
                            .activation(Activation.SOFTMAX).nOut(nOut).build())
                    .setInputType(InputType.convolutional(hw, hw, depth));

            MultiLayerNetwork mln = new MultiLayerNetwork(builder.build());
            mln.init();

//            for (int j = 0; j < mln.getnLayers(); j++)
//                System.out.println("Layer " + j + " # params: " + mln.getLayer(j).numParams());

            //Mean and variance vars are not gradient checkable; mean/variance "gradient" is used to implement running mean/variance calc
            //i.e., runningMean = decay * runningMean + (1-decay) * batchMean
            //However, numerical gradient will be 0 as forward pass doesn't depend on this "parameter"
            Set<String> excludeParams = new HashSet<>(Arrays.asList("1_mean", "1_var", "1_log10stdev"));
            boolean gradOK = GradientCheckUtil.checkGradients(new GradientCheckUtil.MLNConfig().net(mln).input(input)
                    .labels(labels).excludeParams(excludeParams));

            assertTrue(gradOK);
            TestUtils.testModelSerialization(mln);
        }
    }

Example 20

Source File: BNGradientCheckTest.java From deeplearning4j with Apache License 2.0

4 votes

@Test
    public void testGradient2dSimple() {
        DataNormalization scaler = new NormalizerMinMaxScaler();
        DataSetIterator iter = new IrisDataSetIterator(150, 150);
        scaler.fit(iter);
        iter.setPreProcessor(scaler);
        DataSet ds = iter.next();
        INDArray input = ds.getFeatures();
        INDArray labels = ds.getLabels();

        for (boolean useLogStd : new boolean[]{true, false}) {

            MultiLayerConfiguration.Builder builder =
                    new NeuralNetConfiguration.Builder().updater(new NoOp())
                            .dataType(DataType.DOUBLE)
                            .seed(12345L)
                            .dist(new NormalDistribution(0, 1)).list()
                            .layer(0, new DenseLayer.Builder().nIn(4).nOut(3)
                                    .activation(Activation.IDENTITY).build())
                            .layer(1, new BatchNormalization.Builder().useLogStd(useLogStd).nOut(3).build())
                            .layer(2, new ActivationLayer.Builder().activation(Activation.TANH).build())
                            .layer(3, new OutputLayer.Builder(LossFunctions.LossFunction.MCXENT)
                                    .activation(Activation.SOFTMAX).nIn(3).nOut(3).build());

            MultiLayerNetwork mln = new MultiLayerNetwork(builder.build());
            mln.init();

//            for (int j = 0; j < mln.getnLayers(); j++)
//                System.out.println("Layer " + j + " # params: " + mln.getLayer(j).numParams());

            //Mean and variance vars are not gradient checkable; mean/variance "gradient" is used to implement running mean/variance calc
            //i.e., runningMean = decay * runningMean + (1-decay) * batchMean
            //However, numerical gradient will be 0 as forward pass doesn't depend on this "parameter"
            Set<String> excludeParams = new HashSet<>(Arrays.asList("1_mean", "1_var", "1_log10stdev"));
            boolean gradOK = GradientCheckUtil.checkGradients(new GradientCheckUtil.MLNConfig().net(mln).input(input)
                    .labels(labels).excludeParams(excludeParams));

            assertTrue(gradOK);
            TestUtils.testModelSerialization(mln);
        }
    }