Java Code Examples for org.deeplearning4j.nn.multilayer.MultiLayerNetwork#setLabels()

The following examples show how to use org.deeplearning4j.nn.multilayer.MultiLayerNetwork#setLabels() . You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: WorkspaceTests.java    From deeplearning4j with Apache License 2.0 5 votes vote down vote up
@Test
public void testWithPreprocessorsMLN() {
    for (WorkspaceMode wm : WorkspaceMode.values()) {
        System.out.println(wm);
        MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder()
                .trainingWorkspaceMode(wm)
                .inferenceWorkspaceMode(wm)
                .list()
                .layer(new GravesLSTM.Builder().nIn(10).nOut(5).build())
                .layer(new GravesLSTM.Builder().nIn(5).nOut(8).build())
                .layer(new RnnOutputLayer.Builder(LossFunctions.LossFunction.MSE).activation(Activation.SIGMOID).nOut(3).build())
                .inputPreProcessor(0, new DupPreProcessor())
                .setInputType(InputType.recurrent(10))
                .build();

        MultiLayerNetwork net = new MultiLayerNetwork(conf);
        net.init();


        INDArray input = Nd4j.zeros(1, 10, 5);

        for (boolean train : new boolean[]{false, true}) {
            net.clear();
            net.feedForward(input, train);
        }

        net.setInput(input);
        net.setLabels(Nd4j.rand(new int[]{1, 3, 5}));
        net.computeGradientAndScore();
    }
}
 
Example 2
Source File: RegressionTest100a.java    From deeplearning4j with Apache License 2.0 5 votes vote down vote up
@Test
public void testUpsampling2d() throws Exception {

    File f = Resources.asFile("regression_testing/100a/upsampling/net.bin");
    MultiLayerNetwork net = MultiLayerNetwork.load(f, true);

    INDArray in;
    File fIn = Resources.asFile("regression_testing/100a/upsampling/in.bin");
    try(DataInputStream dis = new DataInputStream(new FileInputStream(fIn))){
        in = Nd4j.read(dis);
    }

    INDArray label;
    File fLabels = Resources.asFile("regression_testing/100a/upsampling/labels.bin");
    try(DataInputStream dis = new DataInputStream(new FileInputStream(fLabels))){
        label = Nd4j.read(dis);
    }

    INDArray outExp;
    File fOutExp = Resources.asFile("regression_testing/100a/upsampling/out.bin");
    try(DataInputStream dis = new DataInputStream(new FileInputStream(fOutExp))){
        outExp = Nd4j.read(dis);
    }

    INDArray gradExp;
    File fGradExp = Resources.asFile("regression_testing/100a/upsampling/gradient.bin");
    try(DataInputStream dis = new DataInputStream(new FileInputStream(fGradExp))){
        gradExp = Nd4j.read(dis);
    }

    INDArray out = net.output(in, false);
    assertEquals(outExp, out);

    net.setInput(in);
    net.setLabels(label);
    net.computeGradientAndScore();

    INDArray grad = net.getFlattenedGradients();
    assertEquals(gradExp, grad);
}
 
Example 3
Source File: ValidateCudnnLSTM.java    From deeplearning4j with Apache License 2.0 4 votes vote down vote up
@Test
public void validateImplMultiLayer() throws Exception {

    Nd4j.getRandom().setSeed(12345);
    int minibatch = 10;
    int inputSize = 3;
    int lstmLayerSize = 4;
    int timeSeriesLength = 3;
    int nOut = 2;
    INDArray input = Nd4j.rand(new int[] {minibatch, inputSize, timeSeriesLength});
    INDArray labels = Nd4j.zeros(minibatch, nOut, timeSeriesLength);
    Random r = new Random(12345);
    for (int i = 0; i < minibatch; i++) {
        for (int j = 0; j < timeSeriesLength; j++) {
            labels.putScalar(i, r.nextInt(nOut), j, 1.0);
        }
    }

    MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder().updater(new NoOp())
                    .dataType(DataType.DOUBLE)
                    .inferenceWorkspaceMode(WorkspaceMode.NONE).trainingWorkspaceMode(WorkspaceMode.NONE)
                    .seed(12345L)
                    .dist(new NormalDistribution(0, 2)).list()
                    .layer(0, new LSTM.Builder().nIn(input.size(1)).nOut(lstmLayerSize)
                                    .gateActivationFunction(Activation.SIGMOID).activation(Activation.TANH).build())
                    .layer(1, new LSTM.Builder().nIn(lstmLayerSize).nOut(lstmLayerSize)
                                    .gateActivationFunction(Activation.SIGMOID).activation(Activation.TANH).build())
                    .layer(2, new RnnOutputLayer.Builder(LossFunctions.LossFunction.MCXENT)
                                    .activation(Activation.SOFTMAX).nIn(lstmLayerSize).nOut(nOut).build())
                    .build();

    MultiLayerNetwork mln1 = new MultiLayerNetwork(conf.clone());
    mln1.init();

    MultiLayerNetwork mln2 = new MultiLayerNetwork(conf.clone());
    mln2.init();


    assertEquals(mln1.params(), mln2.params());

    Field f = org.deeplearning4j.nn.layers.recurrent.LSTM.class.getDeclaredField("helper");
    f.setAccessible(true);

    Layer l0 = mln1.getLayer(0);
    Layer l1 = mln1.getLayer(1);
    f.set(l0, null);
    f.set(l1, null);
    assertNull(f.get(l0));
    assertNull(f.get(l1));

    l0 = mln2.getLayer(0);
    l1 = mln2.getLayer(1);
    assertTrue(f.get(l0) instanceof CudnnLSTMHelper);
    assertTrue(f.get(l1) instanceof CudnnLSTMHelper);


    INDArray out1 = mln1.output(input);
    INDArray out2 = mln2.output(input);

    assertEquals(out1, out2);

    for (int x = 0; x < 10; x++) {
        input = Nd4j.rand(new int[] {minibatch, inputSize, timeSeriesLength});
        labels = Nd4j.zeros(minibatch, nOut, timeSeriesLength);
        for (int i = 0; i < minibatch; i++) {
            for (int j = 0; j < timeSeriesLength; j++) {
                labels.putScalar(i, r.nextInt(nOut), j, 1.0);
            }
        }

        mln1.setInput(input);
        mln1.setLabels(labels);

        mln2.setInput(input);
        mln2.setLabels(labels);

        mln1.computeGradientAndScore();
        mln2.computeGradientAndScore();

        assertEquals(mln1.score(), mln2.score(), 1e-5);

        assertEquals(mln1.getFlattenedGradients(), mln2.getFlattenedGradients());

        mln1.fit(new DataSet(input, labels));
        mln2.fit(new DataSet(input, labels));

        assertEquals("Iteration: " + x, mln1.params(), mln2.params());
    }
}
 
Example 4
Source File: DTypeTests.java    From deeplearning4j with Apache License 2.0 4 votes vote down vote up
@Test
public void testDtypesModelVsGlobalDtypeCnn1d() {
    //Nd4jCpu.Environment.getInstance().setUseMKLDNN(false);

    for (DataType globalDtype : new DataType[]{DataType.DOUBLE, DataType.FLOAT, DataType.HALF}) {
        Nd4j.setDefaultDataTypes(globalDtype, globalDtype);
        for (DataType networkDtype : new DataType[]{DataType.DOUBLE, DataType.FLOAT, DataType.HALF}) {
            for (int outputLayer = 0; outputLayer < 3; outputLayer++) {
                assertEquals(globalDtype, Nd4j.dataType());
                assertEquals(globalDtype, Nd4j.defaultFloatingPointType());

                String msg = "Global dtype: " + globalDtype + ", network dtype: " + networkDtype + ", outputLayer=" + outputLayer;

                Layer ol;
                Layer secondLast;
                switch (outputLayer) {
                    case 0:
                        ol = new OutputLayer.Builder().nOut(10).activation(Activation.SOFTMAX).lossFunction(LossFunctions.LossFunction.MCXENT).build();
                        secondLast = new GlobalPoolingLayer(PoolingType.MAX);
                        break;
                    case 1:
                        ol = new RnnOutputLayer.Builder().activation(Activation.SOFTMAX).lossFunction(LossFunctions.LossFunction.MCXENT).nOut(5).build();
                        secondLast = new Convolution1D.Builder().kernelSize(2).nOut(5).build();
                        break;
                    case 2:
                        ol = new RnnLossLayer.Builder().activation(Activation.SOFTMAX).lossFunction(LossFunctions.LossFunction.MCXENT).build();
                        secondLast = new Convolution1D.Builder().kernelSize(2).nOut(5).build();
                        break;
                    default:
                        throw new RuntimeException();
                }


                MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder()
                        .trainingWorkspaceMode(WorkspaceMode.NONE)
                        .inferenceWorkspaceMode(WorkspaceMode.NONE)
                        .dataType(networkDtype)
                        .convolutionMode(ConvolutionMode.Same)
                        .updater(new Adam(1e-2))
                        .list()
                        .layer(new Convolution1D.Builder().kernelSize(2).stride(1).nOut(3).activation(Activation.TANH).build())
                        .layer(new Subsampling1DLayer.Builder().poolingType(PoolingType.MAX).kernelSize(5).stride(1).build())
                        .layer(new Cropping1D.Builder(1).build())
                        .layer(new ZeroPadding1DLayer(1))
                        .layer(new Upsampling1D.Builder(2).build())
                        .layer(secondLast)
                        .layer(ol)
                        .setInputType(InputType.recurrent(5, 10))
                        .build();

                MultiLayerNetwork net = new MultiLayerNetwork(conf);
                net.init();

                net.initGradientsView();
                assertEquals(msg, networkDtype, net.params().dataType());
                assertEquals(msg, networkDtype, net.getFlattenedGradients().dataType());
                assertEquals(msg, networkDtype, net.getUpdater(true).getStateViewArray().dataType());

                INDArray in = Nd4j.rand(networkDtype, 2, 5, 10);
                INDArray label;
                if (outputLayer == 0) {
                    //OutputLayer
                    label = TestUtils.randomOneHot(2, 10).castTo(networkDtype);
                } else {
                    //RnnOutputLayer, RnnLossLayer
                    label = Nd4j.rand(networkDtype, 2, 5, 20);   //Longer sequence due to upsampling
                }

                INDArray out = net.output(in);
                assertEquals(msg, networkDtype, out.dataType());
                List<INDArray> ff = net.feedForward(in);
                for (int i = 0; i < ff.size(); i++) {
                    String s = msg + " - layer " + (i - 1) + " - " + (i == 0 ? "input" : net.getLayer(i - 1).conf().getLayer().getClass().getSimpleName());
                    assertEquals(s, networkDtype, ff.get(i).dataType());
                }

                net.setInput(in);
                net.setLabels(label);
                net.computeGradientAndScore();

                net.fit(new DataSet(in, label));

                logUsedClasses(net);

                //Now, test mismatched dtypes for input/labels:
                for (DataType inputLabelDtype : new DataType[]{DataType.DOUBLE, DataType.FLOAT, DataType.HALF}) {
                    System.out.println(msg + " - " + inputLabelDtype);
                    INDArray in2 = in.castTo(inputLabelDtype);
                    INDArray label2 = label.castTo(inputLabelDtype);
                    net.output(in2);
                    net.setInput(in2);
                    net.setLabels(label2);
                    net.computeGradientAndScore();

                    net.fit(new DataSet(in2, label2));
                }
            }
        }
    }
}
 
Example 5
Source File: EmbeddingLayerTest.java    From deeplearning4j with Apache License 2.0 4 votes vote down vote up
@Test
public void testEmbeddingSequenceLayerWithMasking() {
    //Idea: have masking on the input with an embedding and dense layers on input
    //Ensure that the parameter gradients for the inputs don't depend on the inputs when inputs are masked

    int[] miniBatchSizes = {1, 3};
    int nIn = 2;
    Random r = new Random(12345);

    int numInputClasses = 10;
    int timeSeriesLength = 5;

    for (DataType maskDtype : new DataType[]{DataType.FLOAT, DataType.DOUBLE, DataType.INT}) {
        for (DataType inLabelDtype : new DataType[]{DataType.FLOAT, DataType.DOUBLE, DataType.INT}) {
            for(int inputRank : new int[]{2, 3}) {
                for (int nExamples : miniBatchSizes) {
                    Nd4j.getRandom().setSeed(12345);

                    MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder()
                            .optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT)
                            .updater(new Sgd(0.1)).seed(12345).list()
                            .layer(0, new EmbeddingSequenceLayer.Builder().hasBias(true).activation(Activation.TANH).nIn(numInputClasses)
                                    .nOut(5).build())
                            .layer(1, new DenseLayer.Builder().activation(Activation.TANH).nIn(5).nOut(4).build())
                            .layer(2, new LSTM.Builder().activation(Activation.TANH).nIn(4).nOut(3).build())
                            .layer(3, new RnnOutputLayer.Builder().lossFunction(LossFunctions.LossFunction.MSE).nIn(3)
                                    .nOut(4).build())
                            .setInputType(InputType.recurrent(1)).build();

                    MultiLayerNetwork net = new MultiLayerNetwork(conf);
                    net.init();

                    MultiLayerConfiguration conf2 = new NeuralNetConfiguration.Builder()
                            .optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT)
                            .updater(new Sgd(0.1)).seed(12345).list()
                            .layer(0, new DenseLayer.Builder().activation(Activation.TANH).nIn(numInputClasses).nOut(5)
                                    .build())
                            .layer(1, new DenseLayer.Builder().activation(Activation.TANH).nIn(5).nOut(4).build())
                            .layer(2, new LSTM.Builder().activation(Activation.TANH).nIn(4).nOut(3).build())
                            .layer(3, new RnnOutputLayer.Builder().lossFunction(LossFunctions.LossFunction.MSE).nIn(3)
                                    .nOut(4).build())
                            .setInputType(InputType.recurrent(1)).build();

                    MultiLayerNetwork net2 = new MultiLayerNetwork(conf2);
                    net2.init();

                    net2.setParams(net.params().dup());

                    INDArray inEmbedding = Nd4j.zeros(inLabelDtype, inputRank == 2 ? new long[]{nExamples, timeSeriesLength} : new long[]{nExamples, 1, timeSeriesLength});
                    INDArray inDense = Nd4j.zeros(inLabelDtype, nExamples, numInputClasses, timeSeriesLength);

                    INDArray labels = Nd4j.zeros(inLabelDtype, nExamples, 4, timeSeriesLength);

                    for (int i = 0; i < nExamples; i++) {
                        for (int j = 0; j < timeSeriesLength; j++) {
                            int inIdx = r.nextInt(numInputClasses);
                            inEmbedding.putScalar(inputRank == 2 ? new int[]{i, j} : new int[]{i, 0, j}, inIdx);
                            inDense.putScalar(new int[]{i, inIdx, j}, 1.0);

                            int outIdx = r.nextInt(4);
                            labels.putScalar(new int[]{i, outIdx, j}, 1.0);
                        }
                    }

                    INDArray inputMask = Nd4j.zeros(maskDtype, nExamples, timeSeriesLength);
                    for (int i = 0; i < nExamples; i++) {
                        for (int j = 0; j < timeSeriesLength; j++) {
                            inputMask.putScalar(new int[]{i, j}, (r.nextBoolean() ? 1.0 : 0.0));
                        }
                    }

                    net.setLayerMaskArrays(inputMask, null);
                    net2.setLayerMaskArrays(inputMask, null);
                    List<INDArray> actEmbedding = net.feedForward(inEmbedding, false);
                    List<INDArray> actDense = net2.feedForward(inDense, false);
                    for (int i = 2; i < actEmbedding.size(); i++) { //Start from layer 2: EmbeddingSequence is 3d, first dense is 2d (before reshape)
                        assertEquals(actDense.get(i), actEmbedding.get(i));
                    }

                    net.setLabels(labels);
                    net2.setLabels(labels);
                    net.computeGradientAndScore();
                    net2.computeGradientAndScore();

                    assertEquals(net2.score(), net.score(), 1e-5);

                    Map<String, INDArray> gradients = net.gradient().gradientForVariable();
                    Map<String, INDArray> gradients2 = net2.gradient().gradientForVariable();
                    assertEquals(gradients.keySet(), gradients2.keySet());
                    for (String s : gradients.keySet()) {
                        assertEquals(gradients2.get(s), gradients.get(s));
                    }
                }
            }
        }
    }
}
 
Example 6
Source File: DTypeTests.java    From deeplearning4j with Apache License 2.0 4 votes vote down vote up
@Test
public void testDtypesModelVsGlobalDtypeMisc() {
    for (DataType globalDtype : new DataType[]{DataType.DOUBLE, DataType.FLOAT, DataType.HALF}) {
        Nd4j.setDefaultDataTypes(globalDtype, globalDtype);
        for (DataType networkDtype : new DataType[]{DataType.DOUBLE, DataType.FLOAT, DataType.HALF}) {
            assertEquals(globalDtype, Nd4j.dataType());
            assertEquals(globalDtype, Nd4j.defaultFloatingPointType());

            String msg = "Global dtype: " + globalDtype + ", network dtype: " + networkDtype;


            MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder()
                    .dataType(networkDtype)
                    .convolutionMode(ConvolutionMode.Same)
                    .updater(new Adam(1e-2))
                    .list()
                    .layer(new SpaceToBatchLayer.Builder().blocks(1, 1).build())
                    .layer(new SpaceToDepthLayer.Builder().blocks(2).build())
                    .layer(new OutputLayer.Builder().nOut(10).activation(Activation.SOFTMAX).lossFunction(LossFunctions.LossFunction.MCXENT).build())
                    .setInputType(InputType.convolutional(28, 28, 5))
                    .build();

            MultiLayerNetwork net = new MultiLayerNetwork(conf);
            net.init();

            net.initGradientsView();
            assertEquals(msg, networkDtype, net.params().dataType());
            assertEquals(msg, networkDtype, net.getFlattenedGradients().dataType());
            assertEquals(msg, networkDtype, net.getUpdater(true).getStateViewArray().dataType());

            INDArray in = Nd4j.rand(networkDtype, 2, 5, 28, 28);
            INDArray label = TestUtils.randomOneHot(2, 10).castTo(networkDtype);

            INDArray out = net.output(in);
            assertEquals(msg, networkDtype, out.dataType());
            List<INDArray> ff = net.feedForward(in);
            for (int i = 0; i < ff.size(); i++) {
                String s = msg + " - layer " + (i - 1) + " - " + (i == 0 ? "input" : net.getLayer(i - 1).conf().getLayer().getClass().getSimpleName());
                assertEquals(s, networkDtype, ff.get(i).dataType());
            }

            net.setInput(in);
            net.setLabels(label);
            net.computeGradientAndScore();

            net.fit(new DataSet(in, label));

            logUsedClasses(net);

            //Now, test mismatched dtypes for input/labels:
            for (DataType inputLabelDtype : new DataType[]{DataType.DOUBLE, DataType.FLOAT, DataType.HALF}) {
                INDArray in2 = in.castTo(inputLabelDtype);
                INDArray label2 = label.castTo(inputLabelDtype);
                net.output(in2);
                net.setInput(in2);
                net.setLabels(label2);
                net.computeGradientAndScore();

                net.fit(new DataSet(in2, label2));
            }
        }
    }
}
 
Example 7
Source File: CNNGradientCheckTest.java    From deeplearning4j with Apache License 2.0 4 votes vote down vote up
@Test
public void testGradientCNNMLN() {
    //Parameterized test, testing combinations of:
    // (a) activation function
    // (b) Whether to test at random initialization, or after some learning (i.e., 'characteristic mode of operation')
    // (c) Loss function (with specified output activations)
    Activation[] activFns = {Activation.SIGMOID, Activation.TANH};
    boolean[] characteristic = {false, true}; //If true: run some backprop steps first

    LossFunctions.LossFunction[] lossFunctions =
            {LossFunctions.LossFunction.NEGATIVELOGLIKELIHOOD, LossFunctions.LossFunction.MSE};
    Activation[] outputActivations = {Activation.SOFTMAX, Activation.TANH}; //i.e., lossFunctions[i] used with outputActivations[i] here

    DataSet ds = new IrisDataSetIterator(150, 150).next();
    ds.normalizeZeroMeanZeroUnitVariance();
    INDArray input = ds.getFeatures();
    INDArray labels = ds.getLabels();

    for (Activation afn : activFns) {
        for (boolean doLearningFirst : characteristic) {
            for (int i = 0; i < lossFunctions.length; i++) {
                LossFunctions.LossFunction lf = lossFunctions[i];
                Activation outputActivation = outputActivations[i];

                MultiLayerConfiguration.Builder builder = new NeuralNetConfiguration.Builder()
                        .dataType(DataType.DOUBLE)
                        .optimizationAlgo(OptimizationAlgorithm.CONJUGATE_GRADIENT).updater(new NoOp())
                        .weightInit(WeightInit.XAVIER).seed(12345L).list()
                        .layer(0, new ConvolutionLayer.Builder(1, 1).nOut(6).activation(afn)
                                .cudnnAllowFallback(false)
                                .build())
                        .layer(1, new OutputLayer.Builder(lf).activation(outputActivation).nOut(3).build())
                        .setInputType(InputType.convolutionalFlat(1, 4, 1));

                MultiLayerConfiguration conf = builder.build();

                MultiLayerNetwork mln = new MultiLayerNetwork(conf);
                mln.init();
                String name = new Object() {
                }.getClass().getEnclosingMethod().getName();

                if (doLearningFirst) {
                    //Run a number of iterations of learning
                    mln.setInput(ds.getFeatures());
                    mln.setLabels(ds.getLabels());
                    mln.computeGradientAndScore();
                    double scoreBefore = mln.score();
                    for (int j = 0; j < 10; j++)
                        mln.fit(ds);
                    mln.computeGradientAndScore();
                    double scoreAfter = mln.score();
                    //Can't test in 'characteristic mode of operation' if not learning
                    String msg = name + " - score did not (sufficiently) decrease during learning - activationFn="
                            + afn + ", lossFn=" + lf + ", outputActivation=" + outputActivation
                            + ", doLearningFirst= " + doLearningFirst + " (before=" + scoreBefore
                            + ", scoreAfter=" + scoreAfter + ")";
                    assertTrue(msg, scoreAfter < 0.8 * scoreBefore);
                }

                if (PRINT_RESULTS) {
                    System.out.println(name + " - activationFn=" + afn + ", lossFn=" + lf + ", outputActivation="
                            + outputActivation + ", doLearningFirst=" + doLearningFirst);
                }

                boolean gradOK = GradientCheckUtil.checkGradients(mln, DEFAULT_EPS, DEFAULT_MAX_REL_ERROR,
                        DEFAULT_MIN_ABS_ERROR, PRINT_RESULTS, RETURN_ON_FIRST_FAILURE, input, labels);

                assertTrue(gradOK);
                TestUtils.testModelSerialization(mln);
            }
        }
    }
}
 
Example 8
Source File: EmbeddingLayerTest.java    From deeplearning4j with Apache License 2.0 4 votes vote down vote up
@Test
    public void testEmbeddingSequenceBackwardPass() {
        int nClassesIn = 10;
        int embeddingDim = 5;
        int nOut = 4;
        int inputLength = 1;

        MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder().activation(Activation.TANH).list()
                .layer(new EmbeddingSequenceLayer.Builder().inputLength(inputLength)
                        .hasBias(true).nIn(nClassesIn).nOut(embeddingDim).build())
                .layer(new RnnOutputLayer.Builder().nIn(embeddingDim).nOut(nOut).activation(Activation.SOFTMAX).build())
                .build();
        MultiLayerConfiguration conf2 = new NeuralNetConfiguration.Builder().activation(Activation.TANH).list()
                .layer(new DenseLayer.Builder().nIn(nClassesIn).nOut(embeddingDim).activation(Activation.IDENTITY).build())
                .layer(new RnnOutputLayer.Builder().nIn(embeddingDim).nOut(nOut).activation(Activation.SOFTMAX).build())
                .setInputType(InputType.recurrent(nClassesIn))
                .build();

        MultiLayerNetwork net = new MultiLayerNetwork(conf);
        MultiLayerNetwork net2 = new MultiLayerNetwork(conf2);
        net.init();
        net2.init();

        net2.setParams(net.params().dup());

        int batchSize = 3;
        INDArray inEmbedding = Nd4j.create(batchSize, 1);
        INDArray inOneHot = Nd4j.create(batchSize, nClassesIn, 1);
        INDArray outLabels = Nd4j.create(batchSize, 4, 1);

        Random r = new Random(1337);
        for (int i = 0; i < batchSize; i++) {
            int classIdx = r.nextInt(nClassesIn);
            inEmbedding.putScalar(i, classIdx);
            inOneHot.putScalar(new int[]{i, classIdx, 0}, 1.0);

            int labelIdx = r.nextInt(4);
            outLabels.putScalar(new int[]{i, labelIdx, 0}, 1.0);
        }

        net.setInput(inEmbedding);
        net2.setInput(inOneHot);
        net.setLabels(outLabels);
        net2.setLabels(outLabels);

        net.computeGradientAndScore();
        net2.computeGradientAndScore();

//        System.out.println(net.score() + "\t" + net2.score());
        assertEquals(net2.score(), net.score(), 1e-6);

        Map<String, INDArray> gradient = net.gradient().gradientForVariable();
        Map<String, INDArray> gradient2 = net2.gradient().gradientForVariable();
        assertEquals(gradient.size(), gradient2.size());

        for (String s : gradient.keySet()) {
            assertEquals(gradient2.get(s), gradient.get(s));
        }
    }
 
Example 9
Source File: EmbeddingLayerTest.java    From deeplearning4j with Apache License 2.0 4 votes vote down vote up
@Test
public void testEmbeddingBackwardPass() {
    //With the same parameters, embedding layer should have same activations as the equivalent one-hot representation
    // input with a DenseLayer

    int nClassesIn = 10;

    MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder().activation(Activation.TANH).list()
            .layer(0, new EmbeddingLayer.Builder().hasBias(true).nIn(nClassesIn).nOut(5).build()).layer(1,
                    new OutputLayer.Builder(LossFunctions.LossFunction.MCXENT).nIn(5).nOut(4)
                            .activation(Activation.SOFTMAX).build())
            .build();
    MultiLayerConfiguration conf2 = new NeuralNetConfiguration.Builder().activation(Activation.TANH)
            .weightInit(WeightInit.XAVIER).list()
            .layer(new DenseLayer.Builder().nIn(nClassesIn).nOut(5).activation(Activation.IDENTITY).build())
            .layer(new OutputLayer.Builder(LossFunctions.LossFunction.MCXENT).nIn(5).nOut(4)
                            .activation(Activation.SOFTMAX).build())
            .build();

    MultiLayerNetwork net = new MultiLayerNetwork(conf);
    MultiLayerNetwork net2 = new MultiLayerNetwork(conf2);
    net.init();
    net2.init();

    net2.setParams(net.params().dup());

    int batchSize = 3;
    INDArray inEmbedding = Nd4j.create(batchSize, 1);
    INDArray inOneHot = Nd4j.create(batchSize, nClassesIn);
    INDArray outLabels = Nd4j.create(batchSize, 4);

    Random r = new Random(12345);
    for (int i = 0; i < batchSize; i++) {
        int classIdx = r.nextInt(nClassesIn);
        inEmbedding.putScalar(i, classIdx);
        inOneHot.putScalar(new int[]{i, classIdx}, 1.0);

        int labelIdx = r.nextInt(4);
        outLabels.putScalar(new int[]{i, labelIdx}, 1.0);
    }

    net.setInput(inEmbedding);
    net2.setInput(inOneHot);
    net.setLabels(outLabels);
    net2.setLabels(outLabels);

    net.computeGradientAndScore();
    net2.computeGradientAndScore();

    assertEquals(net2.score(), net.score(), 1e-6);

    Map<String, INDArray> gradient = net.gradient().gradientForVariable();
    Map<String, INDArray> gradient2 = net2.gradient().gradientForVariable();
    assertEquals(gradient.size(), gradient2.size());

    for (String s : gradient.keySet()) {
        assertEquals(gradient2.get(s), gradient.get(s));
    }
}
 
Example 10
Source File: OutputLayerTest.java    From deeplearning4j with Apache License 2.0 4 votes vote down vote up
@Test
public void testCnnLossLayer(){

    for(WorkspaceMode ws : WorkspaceMode.values()) {
        log.info("*** Testing workspace: " + ws);

        for (Activation a : new Activation[]{Activation.TANH, Activation.SELU}) {
            //Check that (A+identity) is equal to (identity+A), for activation A
            //i.e., should get same output and weight gradients for both

            MultiLayerConfiguration conf1 =
                    new NeuralNetConfiguration.Builder().seed(12345L)
                            .updater(new NoOp())
                            .convolutionMode(ConvolutionMode.Same)
                            .inferenceWorkspaceMode(ws)
                            .trainingWorkspaceMode(ws)
                            .list()
                            .layer(new ConvolutionLayer.Builder().nIn(3).nOut(4).activation(Activation.IDENTITY)
                                    .kernelSize(2, 2).stride(1, 1)
                                    .dist(new NormalDistribution(0, 1.0))
                                    .updater(new NoOp()).build())
                            .layer(new CnnLossLayer.Builder(LossFunction.MSE)
                                    .activation(a)
                                    .build())
                            .build();

            MultiLayerConfiguration conf2 =
                    new NeuralNetConfiguration.Builder().seed(12345L)
                            .updater(new NoOp())
                            .convolutionMode(ConvolutionMode.Same)
                            .inferenceWorkspaceMode(ws)
                            .trainingWorkspaceMode(ws)
                            .list()
                            .layer(new ConvolutionLayer.Builder().nIn(3).nOut(4).activation(a)
                                    .kernelSize(2, 2).stride(1, 1)
                                    .dist(new NormalDistribution(0, 1.0))
                                    .updater(new NoOp()).build())
                            .layer(new CnnLossLayer.Builder(LossFunction.MSE)
                                    .activation(Activation.IDENTITY)
                                    .build())
                            .build();

            MultiLayerNetwork mln = new MultiLayerNetwork(conf1);
            mln.init();

            MultiLayerNetwork mln2 = new MultiLayerNetwork(conf2);
            mln2.init();


            mln2.setParams(mln.params());


            INDArray in = Nd4j.rand(new int[]{3, 3, 5, 5});

            INDArray out1 = mln.output(in);
            INDArray out2 = mln2.output(in);

            assertEquals(out1, out2);

            INDArray labels = Nd4j.rand(out1.shape());

            mln.setInput(in);
            mln.setLabels(labels);

            mln2.setInput(in);
            mln2.setLabels(labels);

            mln.computeGradientAndScore();
            mln2.computeGradientAndScore();

            assertEquals(mln.score(), mln2.score(), 1e-6);
            assertEquals(mln.gradient().gradient(), mln2.gradient().gradient());

            //Also check computeScoreForExamples
            INDArray in2a = Nd4j.rand(new int[]{1, 3, 5, 5});
            INDArray labels2a = Nd4j.rand(new int[]{1, 4, 5, 5});

            INDArray in2 = Nd4j.concat(0, in2a, in2a);
            INDArray labels2 = Nd4j.concat(0, labels2a, labels2a);

            INDArray s = mln.scoreExamples(new DataSet(in2, labels2), false);
            assertArrayEquals(new long[]{2, 1}, s.shape());
            assertEquals(s.getDouble(0), s.getDouble(1), 1e-6);

            TestUtils.testModelSerialization(mln);
        }
    }
}
 
Example 11
Source File: DTypeTests.java    From deeplearning4j with Apache License 2.0 4 votes vote down vote up
@Test
public void testDtypesModelVsGlobalDtypeRnn() {
    for (DataType globalDtype : new DataType[]{DataType.DOUBLE, DataType.FLOAT, DataType.HALF}) {
        Nd4j.setDefaultDataTypes(globalDtype, globalDtype);
        for (DataType networkDtype : new DataType[]{DataType.DOUBLE, DataType.FLOAT, DataType.HALF}) {
            for (int outputLayer = 0; outputLayer < 3; outputLayer++) {
                assertEquals(globalDtype, Nd4j.dataType());
                assertEquals(globalDtype, Nd4j.defaultFloatingPointType());

                String msg = "Global dtype: " + globalDtype + ", network dtype: " + networkDtype + ", outputLayer=" + outputLayer;

                Layer ol;
                Layer secondLast;
                switch (outputLayer) {
                    case 0:
                        ol = new RnnOutputLayer.Builder().nOut(5).activation(Activation.SOFTMAX).lossFunction(LossFunctions.LossFunction.MCXENT).build();
                        secondLast = new SimpleRnn.Builder().nOut(5).activation(Activation.TANH).build();
                        break;
                    case 1:
                        ol = new RnnLossLayer.Builder().activation(Activation.SOFTMAX).lossFunction(LossFunctions.LossFunction.MCXENT).build();
                        secondLast = new SimpleRnn.Builder().nOut(5).activation(Activation.TANH).build();
                        break;
                    case 2:
                        ol = new OutputLayer.Builder().nOut(5).build();
                        secondLast = new LastTimeStep(new SimpleRnn.Builder().nOut(5).activation(Activation.TANH).build());
                        break;
                    default:
                        throw new RuntimeException();
                }

                MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder()
                        .dataType(networkDtype)
                        .convolutionMode(ConvolutionMode.Same)
                        .updater(new Adam(1e-2))
                        .list()
                        .layer(new LSTM.Builder().nIn(5).nOut(5).activation(Activation.TANH).build())
                        .layer(new GravesLSTM.Builder().nIn(5).nOut(5).activation(Activation.TANH).build())
                        .layer(new DenseLayer.Builder().nOut(5).build())
                        .layer(new GravesBidirectionalLSTM.Builder().nIn(5).nOut(5).activation(Activation.TANH).build())
                        .layer(new Bidirectional(new LSTM.Builder().nIn(5).nOut(5).activation(Activation.TANH).build()))
                        .layer(new TimeDistributed(new DenseLayer.Builder().nIn(10).nOut(5).activation(Activation.TANH).build()))
                        .layer(new SimpleRnn.Builder().nIn(5).nOut(5).build())
                        .layer(new MaskZeroLayer.Builder().underlying(new SimpleRnn.Builder().nIn(5).nOut(5).build()).maskValue(0.0).build())
                        .layer(secondLast)
                        .layer(ol)
                        .build();

                MultiLayerNetwork net = new MultiLayerNetwork(conf);
                net.init();

                net.initGradientsView();
                assertEquals(msg, networkDtype, net.params().dataType());
                assertEquals(msg, networkDtype, net.getFlattenedGradients().dataType());
                assertEquals(msg, networkDtype, net.getUpdater(true).getStateViewArray().dataType());

                INDArray in = Nd4j.rand(networkDtype, 2, 5, 2);
                INDArray label;
                if (outputLayer == 2) {
                    label = TestUtils.randomOneHot(2, 5).castTo(networkDtype);
                } else {
                    label = TestUtils.randomOneHotTimeSeries(2, 5, 2).castTo(networkDtype);
                }


                INDArray out = net.output(in);
                assertEquals(msg, networkDtype, out.dataType());
                List<INDArray> ff = net.feedForward(in);
                for (int i = 0; i < ff.size(); i++) {
                    assertEquals(msg, networkDtype, ff.get(i).dataType());
                }

                net.setInput(in);
                net.setLabels(label);
                net.computeGradientAndScore();

                net.fit(new DataSet(in, label, Nd4j.ones(networkDtype, 2, 2), outputLayer == 2 ? null : Nd4j.ones(networkDtype, 2, 2)));

                logUsedClasses(net);

                //Now, test mismatched dtypes for input/labels:
                for (DataType inputLabelDtype : new DataType[]{DataType.DOUBLE, DataType.FLOAT, DataType.HALF}) {
                    INDArray in2 = in.castTo(inputLabelDtype);
                    INDArray label2 = label.castTo(inputLabelDtype);
                    net.output(in2);
                    net.setInput(in2);
                    net.setLabels(label2);
                    net.computeGradientAndScore();

                    net.fit(new DataSet(in2, label2));
                }
            }
        }
    }
}
 
Example 12
Source File: TestComputationGraphNetwork.java    From deeplearning4j with Apache License 2.0 4 votes vote down vote up
@Test
public void testBackwardIrisBasic() {
    ComputationGraphConfiguration configuration = getIrisGraphConfiguration();
    ComputationGraph graph = new ComputationGraph(configuration);
    graph.init();

    MultiLayerConfiguration mlc = getIrisMLNConfiguration();
    MultiLayerNetwork net = new MultiLayerNetwork(mlc);
    net.init();

    DataSetIterator iris = new IrisDataSetIterator(150, 150);
    DataSet ds = iris.next();

    //Now: set parameters of both networks to be identical. Then feedforward, and check we get the same outputs
    Nd4j.getRandom().setSeed(12345);
    int nParams = (4 * 5 + 5) + (5 * 3 + 3);
    INDArray params = Nd4j.rand(1, nParams);
    graph.setParams(params.dup());
    net.setParams(params.dup());

    INDArray input = ds.getFeatures();
    INDArray labels = ds.getLabels();
    graph.setInput(0, input.dup());
    graph.setLabel(0, labels.dup());

    net.setInput(input.dup());
    net.setLabels(labels.dup());

    //Compute gradients
    net.computeGradientAndScore();
    Pair<Gradient, Double> netGradScore = net.gradientAndScore();

    graph.computeGradientAndScore();
    Pair<Gradient, Double> graphGradScore = graph.gradientAndScore();

    assertEquals(netGradScore.getSecond(), graphGradScore.getSecond(), 1e-3);

    //Compare gradients
    Gradient netGrad = netGradScore.getFirst();
    Gradient graphGrad = graphGradScore.getFirst();

    assertNotNull(graphGrad);
    assertEquals(netGrad.gradientForVariable().size(), graphGrad.gradientForVariable().size());

    assertEquals(netGrad.getGradientFor("0_W"), graphGrad.getGradientFor("firstLayer_W"));
    assertEquals(netGrad.getGradientFor("0_b"), graphGrad.getGradientFor("firstLayer_b"));
    assertEquals(netGrad.getGradientFor("1_W"), graphGrad.getGradientFor("outputLayer_W"));
    assertEquals(netGrad.getGradientFor("1_b"), graphGrad.getGradientFor("outputLayer_b"));
}
 
Example 13
Source File: BidirectionalTest.java    From deeplearning4j with Apache License 2.0 4 votes vote down vote up
@Test
public void testSerialization() throws Exception {

    for(WorkspaceMode wsm : WorkspaceMode.values()) {
        log.info("*** Starting workspace mode: " + wsm);

        Nd4j.getRandom().setSeed(12345);

        MultiLayerConfiguration conf1 = new NeuralNetConfiguration.Builder()
                .activation(Activation.TANH)
                .weightInit(WeightInit.XAVIER)
                .trainingWorkspaceMode(wsm)
                .inferenceWorkspaceMode(wsm)
                .updater(new Adam())
                .list()
                .layer(new Bidirectional(Bidirectional.Mode.ADD, new GravesLSTM.Builder().nIn(10).nOut(10).dataFormat(rnnDataFormat).build()))
                .layer(new Bidirectional(Bidirectional.Mode.ADD, new GravesLSTM.Builder().nIn(10).nOut(10).dataFormat(rnnDataFormat).build()))
                .layer(new RnnOutputLayer.Builder().lossFunction(LossFunctions.LossFunction.MSE)
                        .nIn(10).nOut(10).dataFormat(rnnDataFormat).build())
                .build();

        MultiLayerNetwork net1 = new MultiLayerNetwork(conf1);
        net1.init();

        INDArray in;
        INDArray labels;

        long[] inshape = rnnDataFormat == NCW ? new long[]{3, 10, 5} : new long[]{3, 5, 10};

        in = Nd4j.rand(inshape);
        labels = Nd4j.rand(inshape);

        net1.fit(in, labels);

        byte[] bytes;
        try (ByteArrayOutputStream baos = new ByteArrayOutputStream()) {
            ModelSerializer.writeModel(net1, baos, true);
            bytes = baos.toByteArray();
        }


        MultiLayerNetwork net2 = ModelSerializer.restoreMultiLayerNetwork(new ByteArrayInputStream(bytes), true);


        in = Nd4j.rand(inshape);
        labels = Nd4j.rand(inshape);

        INDArray out1 = net1.output(in);
        INDArray out2 = net2.output(in);

        assertEquals(out1, out2);

        net1.setInput(in);
        net2.setInput(in);
        net1.setLabels(labels);
        net2.setLabels(labels);

        net1.computeGradientAndScore();
        net2.computeGradientAndScore();

        assertEquals(net1.score(), net2.score(), 1e-6);
        assertEquals(net1.gradient().gradient(), net2.gradient().gradient());
    }
}
 
Example 14
Source File: DTypeTests.java    From deeplearning4j with Apache License 2.0 4 votes vote down vote up
@Test
public void testAttentionDTypes() {
    for (DataType globalDtype : new DataType[]{DataType.DOUBLE, DataType.FLOAT, DataType.HALF}) {
        Nd4j.setDefaultDataTypes(globalDtype, globalDtype);
        for (DataType networkDtype : new DataType[]{DataType.DOUBLE, DataType.FLOAT, DataType.HALF}) {
            assertEquals(globalDtype, Nd4j.dataType());
            assertEquals(globalDtype, Nd4j.defaultFloatingPointType());

            String msg = "Global dtype: " + globalDtype + ", network dtype: " + networkDtype;

            int mb = 3;
            int nIn = 3;
            int nOut = 5;
            int tsLength = 4;
            int layerSize = 8;
            int numQueries = 6;

            INDArray in = Nd4j.rand(networkDtype, new long[]{mb, nIn, tsLength});
            INDArray labels = TestUtils.randomOneHot(mb, nOut);

            MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder()
                    .dataType(networkDtype)
                    .activation(Activation.TANH)
                    .updater(new NoOp())
                    .weightInit(WeightInit.XAVIER)
                    .list()
                    .layer(new LSTM.Builder().nOut(layerSize).build())
                    .layer(new SelfAttentionLayer.Builder().nOut(8).nHeads(2).projectInput(true).build())
                    .layer(new LearnedSelfAttentionLayer.Builder().nOut(8).nHeads(2).nQueries(numQueries).projectInput(true).build())
                    .layer(new RecurrentAttentionLayer.Builder().nIn(layerSize).nOut(layerSize).nHeads(1).projectInput(false).hasBias(false).build())
                    .layer(new GlobalPoolingLayer.Builder().poolingType(PoolingType.MAX).build())
                    .layer(new OutputLayer.Builder().nOut(nOut).activation(Activation.SOFTMAX)
                            .lossFunction(LossFunctions.LossFunction.MCXENT).build())
                    .setInputType(InputType.recurrent(nIn))
                    .build();

            MultiLayerNetwork net = new MultiLayerNetwork(conf);
            net.init();

            INDArray out = net.output(in);
            assertEquals(msg, networkDtype, out.dataType());
            List<INDArray> ff = net.feedForward(in);
            for (int i = 0; i < ff.size(); i++) {
                String s = msg + " - layer " + (i - 1) + " - " + (i == 0 ? "input" : net.getLayer(i - 1).conf().getLayer().getClass().getSimpleName());
                assertEquals(s, networkDtype, ff.get(i).dataType());
            }

            net.setInput(in);
            net.setLabels(labels);
            net.computeGradientAndScore();

            net.fit(new DataSet(in, labels));

            logUsedClasses(net);

            //Now, test mismatched dtypes for input/labels:
            for (DataType inputLabelDtype : new DataType[]{DataType.DOUBLE, DataType.FLOAT, DataType.HALF}) {
                INDArray in2 = in.castTo(inputLabelDtype);
                INDArray label2 = labels.castTo(inputLabelDtype);
                net.output(in2);
                net.setInput(in2);
                net.setLabels(label2);
                net.computeGradientAndScore();

                net.fit(new DataSet(in2, label2));
            }
        }
    }
}
 
Example 15
Source File: TestCustomUpdater.java    From deeplearning4j with Apache License 2.0 4 votes vote down vote up
@Test
public void testCustomUpdater() {

    //Create a simple custom updater, equivalent to SGD updater

    double lr = 0.03;

    Nd4j.getRandom().setSeed(12345);
    MultiLayerConfiguration conf1 = new NeuralNetConfiguration.Builder().seed(12345)
                    .activation(Activation.TANH).updater(new CustomIUpdater(lr)) //Specify custom IUpdater
                    .list().layer(0, new DenseLayer.Builder().nIn(10).nOut(10).build())
                    .layer(1, new OutputLayer.Builder().nIn(10).nOut(10)
                                    .lossFunction(LossFunctions.LossFunction.MSE).build())
                    .build();

    Nd4j.getRandom().setSeed(12345);
    MultiLayerConfiguration conf2 = new NeuralNetConfiguration.Builder().seed(12345)
                    .activation(Activation.TANH).updater(new Sgd(lr)).list()
                    .layer(0, new DenseLayer.Builder().nIn(10).nOut(10).build()).layer(1, new OutputLayer.Builder()
                                    .nIn(10).nOut(10).lossFunction(LossFunctions.LossFunction.MSE).build())
                    .build();

    //First: Check updater config
    assertTrue(((BaseLayer) conf1.getConf(0).getLayer()).getIUpdater() instanceof CustomIUpdater);
    assertTrue(((BaseLayer) conf1.getConf(1).getLayer()).getIUpdater() instanceof CustomIUpdater);
    assertTrue(((BaseLayer) conf2.getConf(0).getLayer()).getIUpdater() instanceof Sgd);
    assertTrue(((BaseLayer) conf2.getConf(1).getLayer()).getIUpdater() instanceof Sgd);

    CustomIUpdater u0_0 = (CustomIUpdater) ((BaseLayer) conf1.getConf(0).getLayer()).getIUpdater();
    CustomIUpdater u0_1 = (CustomIUpdater) ((BaseLayer) conf1.getConf(1).getLayer()).getIUpdater();
    assertEquals(lr, u0_0.getLearningRate(), 1e-6);
    assertEquals(lr, u0_1.getLearningRate(), 1e-6);

    Sgd u1_0 = (Sgd) ((BaseLayer) conf2.getConf(0).getLayer()).getIUpdater();
    Sgd u1_1 = (Sgd) ((BaseLayer) conf2.getConf(1).getLayer()).getIUpdater();
    assertEquals(lr, u1_0.getLearningRate(), 1e-6);
    assertEquals(lr, u1_1.getLearningRate(), 1e-6);


    //Second: check JSON
    String asJson = conf1.toJson();
    MultiLayerConfiguration fromJson = MultiLayerConfiguration.fromJson(asJson);
    assertEquals(conf1, fromJson);

    Nd4j.getRandom().setSeed(12345);
    MultiLayerNetwork net1 = new MultiLayerNetwork(conf1);
    net1.init();

    Nd4j.getRandom().setSeed(12345);
    MultiLayerNetwork net2 = new MultiLayerNetwork(conf2);
    net2.init();


    //Third: check gradients are equal
    INDArray in = Nd4j.rand(5, 10);
    INDArray labels = Nd4j.rand(5, 10);

    net1.setInput(in);
    net2.setInput(in);

    net1.setLabels(labels);
    net2.setLabels(labels);

    net1.computeGradientAndScore();
    net2.computeGradientAndScore();;

    assertEquals(net1.getFlattenedGradients(), net2.getFlattenedGradients());
}
 
Example 16
Source File: TestMultiModelGradientApplication.java    From deeplearning4j with Apache License 2.0 4 votes vote down vote up
@Test
public void testGradientApplyMultiLayerNetwork() {
    int minibatch = 7;
    int nIn = 10;
    int nOut = 10;

    for (boolean regularization : new boolean[] {false, true}) {
        for (IUpdater u : new IUpdater[] {new Sgd(0.1), new Nesterovs(0.1), new Adam(0.1)}) {

            MultiLayerConfiguration conf =
                            new NeuralNetConfiguration.Builder().seed(12345).activation(Activation.TANH)
                                            .weightInit(WeightInit.XAVIER).updater(u)
                                            .l1(regularization ? 0.2 : 0.0)
                                            .l2(regularization ? 0.3 : 0.0).list()
                                            .layer(0, new DenseLayer.Builder().nIn(nIn).nOut(10).build())
                                            .layer(1, new DenseLayer.Builder().nIn(10).nOut(10).build()).layer(2,
                                                            new OutputLayer.Builder(
                                                                            LossFunctions.LossFunction.MCXENT)
                                                                                            .activation(Activation.SOFTMAX)
                                                                                            .nIn(10).nOut(nOut)
                                                                                            .build())
                                            .build();


            Nd4j.getRandom().setSeed(12345);
            MultiLayerNetwork net1GradCalc = new MultiLayerNetwork(conf);
            net1GradCalc.init();

            Nd4j.getRandom().setSeed(12345);
            MultiLayerNetwork net2GradUpd = new MultiLayerNetwork(conf.clone());
            net2GradUpd.init();

            assertEquals(net1GradCalc.params(), net2GradUpd.params());

            INDArray f = Nd4j.rand(minibatch, nIn);
            INDArray l = Nd4j.create(minibatch, nOut);
            for (int i = 0; i < minibatch; i++) {
                l.putScalar(i, i % nOut, 1.0);
            }
            net1GradCalc.setInput(f);
            net1GradCalc.setLabels(l);

            net2GradUpd.setInput(f);
            net2GradUpd.setLabels(l);

            //Calculate gradient in first net, update and apply it in the second
            //Also: calculate gradient in the second net, just to be sure it isn't modified while doing updating on
            // the other net's gradient
            net1GradCalc.computeGradientAndScore();
            net2GradUpd.computeGradientAndScore();

            Gradient g = net1GradCalc.gradient();
            INDArray gBefore = g.gradient().dup(); //Net 1 gradient should be modified
            INDArray net2GradBefore = net2GradUpd.gradient().gradient().dup(); //But net 2 gradient should not be
            net2GradUpd.getUpdater().update(net2GradUpd, g, 0, 0, minibatch, LayerWorkspaceMgr.noWorkspaces());
            INDArray gAfter = g.gradient().dup();
            INDArray net2GradAfter = net2GradUpd.gradient().gradient().dup();

            assertNotEquals(gBefore, gAfter); //Net 1 gradient should be modified
            assertEquals(net2GradBefore, net2GradAfter); //But net 2 gradient should not be


            //Also: if we apply the gradient using a subi op, we should get the same final params as if we did a fit op
            // on the original network
            net2GradUpd.params().subi(g.gradient());

            net1GradCalc.fit(f, l);
            assertEquals(net1GradCalc.params(), net2GradUpd.params());


            //=============================
            if (!(u instanceof Sgd)) {
                net2GradUpd.getUpdater().getStateViewArray().assign(net1GradCalc.getUpdater().getStateViewArray());
            }
            assertEquals(net1GradCalc.params(), net2GradUpd.params());
            assertEquals(net1GradCalc.getUpdater().getStateViewArray(),
                            net2GradUpd.getUpdater().getStateViewArray());

            //Remove the next 2 lines: fails - as net 1 is 1 iteration ahead
            net1GradCalc.getLayerWiseConfigurations().setIterationCount(0);
            net2GradUpd.getLayerWiseConfigurations().setIterationCount(0);

            for (int i = 0; i < 100; i++) {
                net1GradCalc.fit(f, l);
                net2GradUpd.fit(f, l);
                assertEquals(net1GradCalc.params(), net2GradUpd.params());
            }
        }
    }
}
 
Example 17
Source File: GlobalPoolingMaskingTests.java    From deeplearning4j with Apache License 2.0 4 votes vote down vote up
@Test
public void testMaskingCnnDim2_SingleExample() {
    //Test masking, where mask is along dimension 2

    int minibatch = 1;
    int depthIn = 2;
    int depthOut = 2;
    int nOut = 2;
    int height = 6;
    int width = 3;

    PoolingType[] poolingTypes =
                    new PoolingType[] {PoolingType.SUM, PoolingType.AVG, PoolingType.MAX, PoolingType.PNORM};

    for (PoolingType pt : poolingTypes) {
        MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder().weightInit(WeightInit.XAVIER)
                        .convolutionMode(ConvolutionMode.Same).seed(12345L).list()
                        .layer(0, new ConvolutionLayer.Builder().nIn(depthIn).nOut(depthOut).kernelSize(2, width)
                                        .stride(1, width).activation(Activation.TANH).build())
                        .layer(1, new org.deeplearning4j.nn.conf.layers.GlobalPoolingLayer.Builder().poolingType(pt)
                                        .build())
                        .layer(2, new OutputLayer.Builder(LossFunctions.LossFunction.MCXENT)
                                        .activation(Activation.SOFTMAX).nIn(depthOut).nOut(nOut).build())
                        .build();

        MultiLayerNetwork net = new MultiLayerNetwork(conf);
        net.init();

        INDArray inToBeMasked = Nd4j.rand(new int[] {minibatch, depthIn, height, width});

        //Shape for mask: [minibatch, width]
        INDArray maskArray = Nd4j.create(new double[] {1, 1, 1, 1, 1, 0}, new int[]{1,1,height,1});

        //Multiply the input by the mask array, to ensure the 0s in the mask correspond to 0s in the input vector
        // as would be the case in practice...
        Nd4j.getExecutioner().exec(new BroadcastMulOp(inToBeMasked, maskArray, inToBeMasked, 0, 2));


        net.setLayerMaskArrays(maskArray, null);

        INDArray outMasked = net.output(inToBeMasked);
        net.clearLayerMaskArrays();

        int numSteps = height - 1;
        INDArray subset = inToBeMasked.get(NDArrayIndex.interval(0, 0, true), NDArrayIndex.all(),
                        NDArrayIndex.interval(0, numSteps), NDArrayIndex.all());
        assertArrayEquals(new long[] {1, depthIn, 5, width}, subset.shape());

        INDArray outSubset = net.output(subset);
        INDArray outMaskedSubset = outMasked.getRow(0);

        assertEquals(outSubset, outMaskedSubset);

        //Finally: check gradient calc for exceptions
        net.setLayerMaskArrays(maskArray, null);
        net.setInput(inToBeMasked);
        INDArray labels = Nd4j.create(new double[] {0, 1}, new long[]{1,2});
        net.setLabels(labels);

        net.computeGradientAndScore();
    }
}
 
Example 18
Source File: CNNGradientCheckTest.java    From deeplearning4j with Apache License 2.0 4 votes vote down vote up
@Test
    public void testGradientCNNL1L2MLN() {
        if(this.format != CNN2DFormat.NCHW) //Only test NCHW due to flat input format...
            return;

        //Parameterized test, testing combinations of:
        // (a) activation function
        // (b) Whether to test at random initialization, or after some learning (i.e., 'characteristic mode of operation')
        // (c) Loss function (with specified output activations)

        DataSet ds = new IrisDataSetIterator(150, 150).next();
        ds.normalizeZeroMeanZeroUnitVariance();
        INDArray input = ds.getFeatures();
        INDArray labels = ds.getLabels();

        //use l2vals[i] with l1vals[i]
        double[] l2vals = {0.4, 0.0, 0.4, 0.4};
        double[] l1vals = {0.0, 0.0, 0.5, 0.0};
        double[] biasL2 = {0.0, 0.0, 0.0, 0.2};
        double[] biasL1 = {0.0, 0.0, 0.6, 0.0};
        Activation[] activFns = {Activation.SIGMOID, Activation.TANH, Activation.ELU, Activation.SOFTPLUS};
        boolean[] characteristic = {false, true, false, true}; //If true: run some backprop steps first

        LossFunctions.LossFunction[] lossFunctions =
                {LossFunctions.LossFunction.NEGATIVELOGLIKELIHOOD, LossFunctions.LossFunction.MSE, LossFunctions.LossFunction.NEGATIVELOGLIKELIHOOD, LossFunctions.LossFunction.MSE};
        Activation[] outputActivations = {Activation.SOFTMAX, Activation.TANH, Activation.SOFTMAX, Activation.IDENTITY}; //i.e., lossFunctions[i] used with outputActivations[i] here

        for( int i=0; i<l2vals.length; i++ ){
            Activation afn = activFns[i];
            boolean doLearningFirst = characteristic[i];
            LossFunctions.LossFunction lf = lossFunctions[i];
            Activation outputActivation = outputActivations[i];
            double l2 = l2vals[i];
            double l1 = l1vals[i];

            MultiLayerConfiguration.Builder builder = new NeuralNetConfiguration.Builder()
                    .dataType(DataType.DOUBLE)
                    .l2(l2).l1(l1).l2Bias(biasL2[i]).l1Bias(biasL1[i])
                    .optimizationAlgo(
                            OptimizationAlgorithm.CONJUGATE_GRADIENT)
                    .seed(12345L).list()
                    .layer(0, new ConvolutionLayer.Builder(new int[]{1, 1}).nIn(1).nOut(6)
                            .weightInit(WeightInit.XAVIER).activation(afn)
                            .updater(new NoOp()).build())
                    .layer(1, new OutputLayer.Builder(lf).activation(outputActivation).nOut(3)
                            .weightInit(WeightInit.XAVIER).updater(new NoOp()).build())

                    .setInputType(InputType.convolutionalFlat(1, 4, 1));

            MultiLayerConfiguration conf = builder.build();

            MultiLayerNetwork mln = new MultiLayerNetwork(conf);
            mln.init();
            String testName = new Object() {
            }.getClass().getEnclosingMethod().getName();

            if (doLearningFirst) {
                //Run a number of iterations of learning
                mln.setInput(ds.getFeatures());
                mln.setLabels(ds.getLabels());
                mln.computeGradientAndScore();
                double scoreBefore = mln.score();
                for (int j = 0; j < 10; j++)
                    mln.fit(ds);
                mln.computeGradientAndScore();
                double scoreAfter = mln.score();
                //Can't test in 'characteristic mode of operation' if not learning
                String msg = testName
                        + "- score did not (sufficiently) decrease during learning - activationFn="
                        + afn + ", lossFn=" + lf + ", outputActivation=" + outputActivation
                        + ", doLearningFirst=" + doLearningFirst + " (before=" + scoreBefore
                        + ", scoreAfter=" + scoreAfter + ")";
                assertTrue(msg, scoreAfter < 0.8 * scoreBefore);
            }

            if (PRINT_RESULTS) {
                System.out.println(testName + "- activationFn=" + afn + ", lossFn=" + lf
                        + ", outputActivation=" + outputActivation + ", doLearningFirst="
                        + doLearningFirst);
//                for (int j = 0; j < mln.getnLayers(); j++)
//                    System.out.println("Layer " + j + " # params: " + mln.getLayer(j).numParams());
            }

            boolean gradOK = GradientCheckUtil.checkGradients(mln, DEFAULT_EPS, DEFAULT_MAX_REL_ERROR,
                    DEFAULT_MIN_ABS_ERROR, PRINT_RESULTS, RETURN_ON_FIRST_FAILURE, input, labels);

            assertTrue(gradOK);
            TestUtils.testModelSerialization(mln);
        }
    }
 
Example 19
Source File: CNNGradientCheckTest.java    From deeplearning4j with Apache License 2.0 4 votes vote down vote up
@Test
    public void testGradientCNNMLN() {
        if(this.format != CNN2DFormat.NCHW) //Only test NCHW due to flat input format...
            return;

        //Parameterized test, testing combinations of:
        // (a) activation function
        // (b) Whether to test at random initialization, or after some learning (i.e., 'characteristic mode of operation')
        // (c) Loss function (with specified output activations)
        Activation[] activFns = {Activation.SIGMOID, Activation.TANH};
        boolean[] characteristic = {false, true}; //If true: run some backprop steps first

        LossFunctions.LossFunction[] lossFunctions =
                {LossFunctions.LossFunction.NEGATIVELOGLIKELIHOOD, LossFunctions.LossFunction.MSE};
        Activation[] outputActivations = {Activation.SOFTMAX, Activation.TANH}; //i.e., lossFunctions[i] used with outputActivations[i] here

        DataSet ds = new IrisDataSetIterator(150, 150).next();
        ds.normalizeZeroMeanZeroUnitVariance();
        INDArray input = ds.getFeatures();
        INDArray labels = ds.getLabels();

        for (Activation afn : activFns) {
            for (boolean doLearningFirst : characteristic) {
                for (int i = 0; i < lossFunctions.length; i++) {
                    LossFunctions.LossFunction lf = lossFunctions[i];
                    Activation outputActivation = outputActivations[i];

                    MultiLayerConfiguration.Builder builder = new NeuralNetConfiguration.Builder()
                            .dataType(DataType.DOUBLE)
                            .optimizationAlgo(OptimizationAlgorithm.CONJUGATE_GRADIENT).updater(new NoOp())
                            .weightInit(WeightInit.XAVIER).seed(12345L).list()
                            .layer(0, new ConvolutionLayer.Builder(1, 1).nOut(6).activation(afn).build())
                            .layer(1, new OutputLayer.Builder(lf).activation(outputActivation).nOut(3).build())
                            .setInputType(InputType.convolutionalFlat(1, 4, 1));

                    MultiLayerConfiguration conf = builder.build();

                    MultiLayerNetwork mln = new MultiLayerNetwork(conf);
                    mln.init();
                    String name = new Object() {
                    }.getClass().getEnclosingMethod().getName();

                    if (doLearningFirst) {
                        //Run a number of iterations of learning
                        mln.setInput(ds.getFeatures());
                        mln.setLabels(ds.getLabels());
                        mln.computeGradientAndScore();
                        double scoreBefore = mln.score();
                        for (int j = 0; j < 10; j++)
                            mln.fit(ds);
                        mln.computeGradientAndScore();
                        double scoreAfter = mln.score();
                        //Can't test in 'characteristic mode of operation' if not learning
                        String msg = name + " - score did not (sufficiently) decrease during learning - activationFn="
                                + afn + ", lossFn=" + lf + ", outputActivation=" + outputActivation
                                + ", doLearningFirst= " + doLearningFirst + " (before=" + scoreBefore
                                + ", scoreAfter=" + scoreAfter + ")";
                        assertTrue(msg, scoreAfter < 0.9 * scoreBefore);
                    }

                    if (PRINT_RESULTS) {
                        System.out.println(name + " - activationFn=" + afn + ", lossFn=" + lf + ", outputActivation="
                                + outputActivation + ", doLearningFirst=" + doLearningFirst);
//                        for (int j = 0; j < mln.getnLayers(); j++)
//                            System.out.println("Layer " + j + " # params: " + mln.getLayer(j).numParams());
                    }

                    boolean gradOK = GradientCheckUtil.checkGradients(mln, DEFAULT_EPS, DEFAULT_MAX_REL_ERROR,
                            DEFAULT_MIN_ABS_ERROR, PRINT_RESULTS, RETURN_ON_FIRST_FAILURE, input, labels);

                    assertTrue(gradOK);
                    TestUtils.testModelSerialization(mln);
                }
            }
        }
    }
 
Example 20
Source File: DTypeTests.java    From deeplearning4j with Apache License 2.0 4 votes vote down vote up
@Test
public void testCapsNetDtypes() {
    for (DataType globalDtype : new DataType[]{DataType.DOUBLE, DataType.FLOAT, DataType.HALF}) {
        Nd4j.setDefaultDataTypes(globalDtype, globalDtype);
        for (DataType networkDtype : new DataType[]{DataType.DOUBLE, DataType.FLOAT, DataType.HALF}) {
            assertEquals(globalDtype, Nd4j.dataType());
            assertEquals(globalDtype, Nd4j.defaultFloatingPointType());

            String msg = "Global dtype: " + globalDtype + ", network dtype: " + networkDtype;

            int primaryCapsDim = 2;
            int primarpCapsChannel = 8;
            int capsule = 5;
            int minibatchSize = 8;
            int routing = 1;
            int capsuleDim = 4;
            int height = 6;
            int width = 6;
            int inputDepth = 4;

            MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder()
                    .dataType(networkDtype)
                    .seed(123)
                    .updater(new NoOp())
                    .weightInit(new WeightInitDistribution(new UniformDistribution(-6, 6)))
                    .list()
                    .layer(new PrimaryCapsules.Builder(primaryCapsDim, primarpCapsChannel)
                            .kernelSize(3, 3)
                            .stride(2, 2)
                            .build())
                    .layer(new CapsuleLayer.Builder(capsule, capsuleDim, routing).build())
                    .layer(new CapsuleStrengthLayer.Builder().build())
                    .layer(new ActivationLayer.Builder(new ActivationSoftmax()).build())
                    .layer(new LossLayer.Builder(new LossNegativeLogLikelihood()).build())
                    .setInputType(InputType.convolutional(height, width, inputDepth))
                    .build();

            MultiLayerNetwork net = new MultiLayerNetwork(conf);
            net.init();

            INDArray in = Nd4j.rand(networkDtype, minibatchSize, inputDepth * height * width).mul(10)
                    .reshape(-1, inputDepth, height, width);
            INDArray label = Nd4j.zeros(networkDtype, minibatchSize, capsule);
            for (int i = 0; i < minibatchSize; i++) {
                label.putScalar(new int[]{i, i % capsule}, 1.0);
            }

            INDArray out = net.output(in);
            assertEquals(msg, networkDtype, out.dataType());
            List<INDArray> ff = net.feedForward(in);
            for (int i = 0; i < ff.size(); i++) {
                String s = msg + " - layer " + (i - 1) + " - " + (i == 0 ? "input" : net.getLayer(i - 1).conf().getLayer().getClass().getSimpleName());
                assertEquals(s, networkDtype, ff.get(i).dataType());
            }

            net.setInput(in);
            net.setLabels(label);
            net.computeGradientAndScore();

            net.fit(new DataSet(in, label));

            logUsedClasses(net);

            //Now, test mismatched dtypes for input/labels:
            for (DataType inputLabelDtype : new DataType[]{DataType.DOUBLE, DataType.FLOAT, DataType.HALF}) {
                INDArray in2 = in.castTo(inputLabelDtype);
                INDArray label2 = label.castTo(inputLabelDtype);
                net.output(in2);
                net.setInput(in2);
                net.setLabels(label2);
                net.computeGradientAndScore();

                net.fit(new DataSet(in2, label2));
            }
        }
    }
}