Java Code Examples for org.deeplearning4j.nn.multilayer.MultiLayerNetwork#setInput()

The following examples show how to use org.deeplearning4j.nn.multilayer.MultiLayerNetwork#setInput() . You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: PLNetDyadRanker.java    From AILibs with GNU Affero General Public License v3.0 5 votes vote down vote up
/**
 * Computes the gradient of the plNets' error function for a given instance. The
 * returned gradient is already scaled by the updater. The update procedure is
 * based on algorithm 2 in [1].
 *
 * @param instance
 *            The instance to compute the scaled gradient for.
 * @return The gradient for the given instance, multiplied by the updater's
 *         learning rate.
 */
private INDArray computeScaledGradient(final IDyadRankingInstance instance) {
	// init weight update vector
	INDArray dyadMatrix;
	List<INDArray> dyadList = new ArrayList<>(instance.getNumAttributes());
	for (IDyad dyad : instance) {
		INDArray dyadVector = this.dyadToVector(dyad);
		dyadList.add(dyadVector);
	}
	dyadMatrix = this.dyadRankingToMatrix(instance);
	List<INDArray> activations = this.plNet.feedForward(dyadMatrix);
	INDArray output = activations.get(activations.size() - 1);
	output = output.transpose();
	INDArray deltaW = Nd4j.zeros(this.plNet.params().length());
	Gradient deltaWk = null;
	MultiLayerNetwork plNetClone = this.plNet.clone();
	for (int k = 0; k < instance.getNumAttributes(); k++) {
		// compute derivative of loss w.r.t. k
		plNetClone.setInput(dyadList.get(k));
		plNetClone.feedForward(true, false);
		INDArray lossGradient = PLNetLoss.computeLossGradient(output, k);
		// compute backprop gradient for weight updates w.r.t. k
		Pair<Gradient, INDArray> p = plNetClone.backpropGradient(lossGradient, null);
		deltaWk = p.getFirst();
		this.plNet.getUpdater().update(this.plNet, deltaWk, this.iteration, this.epoch, 1, LayerWorkspaceMgr.noWorkspaces());
		deltaW.addi(deltaWk.gradient());
	}

	return deltaW;
}
 
Example 2
Source File: BatchNormalizationTest.java    From deeplearning4j with Apache License 2.0 5 votes vote down vote up
@Test
public void testDBNBNMultiLayer() throws Exception {
    DataSetIterator iter = new MnistDataSetIterator(2, 2);
    DataSet next = iter.next();

    // Run with separate activation layer
    MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder()
            .optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT).seed(123)
            .list()
            .layer(0, new DenseLayer.Builder().nIn(28 * 28).nOut(10).weightInit(WeightInit.XAVIER)
                    .activation(Activation.RELU).build())
            .layer(1, new BatchNormalization.Builder().nOut(10).build()).layer(2,
                    new ActivationLayer.Builder()
                            .activation(Activation.RELU).build())
            .layer(3, new OutputLayer.Builder(LossFunctions.LossFunction.MCXENT)
                    .weightInit(WeightInit.XAVIER).activation(Activation.SOFTMAX).nIn(10).nOut(10)
                    .build())
            .build();

    MultiLayerNetwork network = new MultiLayerNetwork(conf);
    network.init();

    network.setInput(next.getFeatures());
    INDArray activationsActual = network.output(next.getFeatures());
    assertEquals(10, activationsActual.shape()[1], 1e-2);

    network.fit(next);
    INDArray actualGammaParam = network.getLayer(1).getParam(BatchNormalizationParamInitializer.GAMMA);
    INDArray actualBetaParam = network.getLayer(1).getParam(BatchNormalizationParamInitializer.BETA);
    assertTrue(actualGammaParam != null);
    assertTrue(actualBetaParam != null);
}
 
Example 3
Source File: RegressionTest100a.java    From deeplearning4j with Apache License 2.0 5 votes vote down vote up
@Test
public void testUpsampling2d() throws Exception {

    File f = Resources.asFile("regression_testing/100a/upsampling/net.bin");
    MultiLayerNetwork net = MultiLayerNetwork.load(f, true);

    INDArray in;
    File fIn = Resources.asFile("regression_testing/100a/upsampling/in.bin");
    try(DataInputStream dis = new DataInputStream(new FileInputStream(fIn))){
        in = Nd4j.read(dis);
    }

    INDArray label;
    File fLabels = Resources.asFile("regression_testing/100a/upsampling/labels.bin");
    try(DataInputStream dis = new DataInputStream(new FileInputStream(fLabels))){
        label = Nd4j.read(dis);
    }

    INDArray outExp;
    File fOutExp = Resources.asFile("regression_testing/100a/upsampling/out.bin");
    try(DataInputStream dis = new DataInputStream(new FileInputStream(fOutExp))){
        outExp = Nd4j.read(dis);
    }

    INDArray gradExp;
    File fGradExp = Resources.asFile("regression_testing/100a/upsampling/gradient.bin");
    try(DataInputStream dis = new DataInputStream(new FileInputStream(fGradExp))){
        gradExp = Nd4j.read(dis);
    }

    INDArray out = net.output(in, false);
    assertEquals(outExp, out);

    net.setInput(in);
    net.setLabels(label);
    net.computeGradientAndScore();

    INDArray grad = net.getFlattenedGradients();
    assertEquals(gradExp, grad);
}
 
Example 4
Source File: DTypeTests.java    From deeplearning4j with Apache License 2.0 4 votes vote down vote up
@Test
public void testAttentionDTypes() {
    for (DataType globalDtype : new DataType[]{DataType.DOUBLE, DataType.FLOAT, DataType.HALF}) {
        Nd4j.setDefaultDataTypes(globalDtype, globalDtype);
        for (DataType networkDtype : new DataType[]{DataType.DOUBLE, DataType.FLOAT, DataType.HALF}) {
            assertEquals(globalDtype, Nd4j.dataType());
            assertEquals(globalDtype, Nd4j.defaultFloatingPointType());

            String msg = "Global dtype: " + globalDtype + ", network dtype: " + networkDtype;

            int mb = 3;
            int nIn = 3;
            int nOut = 5;
            int tsLength = 4;
            int layerSize = 8;
            int numQueries = 6;

            INDArray in = Nd4j.rand(networkDtype, new long[]{mb, nIn, tsLength});
            INDArray labels = TestUtils.randomOneHot(mb, nOut);

            MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder()
                    .dataType(networkDtype)
                    .activation(Activation.TANH)
                    .updater(new NoOp())
                    .weightInit(WeightInit.XAVIER)
                    .list()
                    .layer(new LSTM.Builder().nOut(layerSize).build())
                    .layer(new SelfAttentionLayer.Builder().nOut(8).nHeads(2).projectInput(true).build())
                    .layer(new LearnedSelfAttentionLayer.Builder().nOut(8).nHeads(2).nQueries(numQueries).projectInput(true).build())
                    .layer(new RecurrentAttentionLayer.Builder().nIn(layerSize).nOut(layerSize).nHeads(1).projectInput(false).hasBias(false).build())
                    .layer(new GlobalPoolingLayer.Builder().poolingType(PoolingType.MAX).build())
                    .layer(new OutputLayer.Builder().nOut(nOut).activation(Activation.SOFTMAX)
                            .lossFunction(LossFunctions.LossFunction.MCXENT).build())
                    .setInputType(InputType.recurrent(nIn))
                    .build();

            MultiLayerNetwork net = new MultiLayerNetwork(conf);
            net.init();

            INDArray out = net.output(in);
            assertEquals(msg, networkDtype, out.dataType());
            List<INDArray> ff = net.feedForward(in);
            for (int i = 0; i < ff.size(); i++) {
                String s = msg + " - layer " + (i - 1) + " - " + (i == 0 ? "input" : net.getLayer(i - 1).conf().getLayer().getClass().getSimpleName());
                assertEquals(s, networkDtype, ff.get(i).dataType());
            }

            net.setInput(in);
            net.setLabels(labels);
            net.computeGradientAndScore();

            net.fit(new DataSet(in, labels));

            logUsedClasses(net);

            //Now, test mismatched dtypes for input/labels:
            for (DataType inputLabelDtype : new DataType[]{DataType.DOUBLE, DataType.FLOAT, DataType.HALF}) {
                INDArray in2 = in.castTo(inputLabelDtype);
                INDArray label2 = labels.castTo(inputLabelDtype);
                net.output(in2);
                net.setInput(in2);
                net.setLabels(label2);
                net.computeGradientAndScore();

                net.fit(new DataSet(in2, label2));
            }
        }
    }
}
 
Example 5
Source File: DTypeTests.java    From deeplearning4j with Apache License 2.0 4 votes vote down vote up
@Test
public void testCapsNetDtypes() {
    for (DataType globalDtype : new DataType[]{DataType.DOUBLE, DataType.FLOAT, DataType.HALF}) {
        Nd4j.setDefaultDataTypes(globalDtype, globalDtype);
        for (DataType networkDtype : new DataType[]{DataType.DOUBLE, DataType.FLOAT, DataType.HALF}) {
            assertEquals(globalDtype, Nd4j.dataType());
            assertEquals(globalDtype, Nd4j.defaultFloatingPointType());

            String msg = "Global dtype: " + globalDtype + ", network dtype: " + networkDtype;

            int primaryCapsDim = 2;
            int primarpCapsChannel = 8;
            int capsule = 5;
            int minibatchSize = 8;
            int routing = 1;
            int capsuleDim = 4;
            int height = 6;
            int width = 6;
            int inputDepth = 4;

            MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder()
                    .dataType(networkDtype)
                    .seed(123)
                    .updater(new NoOp())
                    .weightInit(new WeightInitDistribution(new UniformDistribution(-6, 6)))
                    .list()
                    .layer(new PrimaryCapsules.Builder(primaryCapsDim, primarpCapsChannel)
                            .kernelSize(3, 3)
                            .stride(2, 2)
                            .build())
                    .layer(new CapsuleLayer.Builder(capsule, capsuleDim, routing).build())
                    .layer(new CapsuleStrengthLayer.Builder().build())
                    .layer(new ActivationLayer.Builder(new ActivationSoftmax()).build())
                    .layer(new LossLayer.Builder(new LossNegativeLogLikelihood()).build())
                    .setInputType(InputType.convolutional(height, width, inputDepth))
                    .build();

            MultiLayerNetwork net = new MultiLayerNetwork(conf);
            net.init();

            INDArray in = Nd4j.rand(networkDtype, minibatchSize, inputDepth * height * width).mul(10)
                    .reshape(-1, inputDepth, height, width);
            INDArray label = Nd4j.zeros(networkDtype, minibatchSize, capsule);
            for (int i = 0; i < minibatchSize; i++) {
                label.putScalar(new int[]{i, i % capsule}, 1.0);
            }

            INDArray out = net.output(in);
            assertEquals(msg, networkDtype, out.dataType());
            List<INDArray> ff = net.feedForward(in);
            for (int i = 0; i < ff.size(); i++) {
                String s = msg + " - layer " + (i - 1) + " - " + (i == 0 ? "input" : net.getLayer(i - 1).conf().getLayer().getClass().getSimpleName());
                assertEquals(s, networkDtype, ff.get(i).dataType());
            }

            net.setInput(in);
            net.setLabels(label);
            net.computeGradientAndScore();

            net.fit(new DataSet(in, label));

            logUsedClasses(net);

            //Now, test mismatched dtypes for input/labels:
            for (DataType inputLabelDtype : new DataType[]{DataType.DOUBLE, DataType.FLOAT, DataType.HALF}) {
                INDArray in2 = in.castTo(inputLabelDtype);
                INDArray label2 = label.castTo(inputLabelDtype);
                net.output(in2);
                net.setInput(in2);
                net.setLabels(label2);
                net.computeGradientAndScore();

                net.fit(new DataSet(in2, label2));
            }
        }
    }
}
 
Example 6
Source File: TestSameDiffOutput.java    From deeplearning4j with Apache License 2.0 4 votes vote down vote up
@Test
public void testMSEOutputLayer(){       //Faliing 2019/04/17 - https://github.com/deeplearning4j/deeplearning4j/issues/7560
    Nd4j.getRandom().setSeed(12345);

    for(Activation a : new Activation[]{Activation.IDENTITY, Activation.TANH, Activation.SOFTMAX}) {
        log.info("Starting test: " + a);

        MultiLayerConfiguration confSD = new NeuralNetConfiguration.Builder()
                .seed(12345)
                .updater(new Adam(0.01))
                .list()
                .layer(new DenseLayer.Builder().nIn(5).nOut(5).activation(Activation.TANH).build())
                .layer(new SameDiffMSEOutputLayer(5, 5, a, WeightInit.XAVIER))
                .build();

        MultiLayerConfiguration confStd = new NeuralNetConfiguration.Builder()
                .seed(12345)
                .updater(new Adam(0.01))
                .list()
                .layer(new DenseLayer.Builder().nIn(5).nOut(5).activation(Activation.TANH).build())
                .layer(new OutputLayer.Builder().nIn(5).nOut(5).activation(a).lossFunction(LossFunctions.LossFunction.MSE).build())
                .build();

        MultiLayerNetwork netSD = new MultiLayerNetwork(confSD);
        netSD.init();

        MultiLayerNetwork netStd = new MultiLayerNetwork(confStd);
        netStd.init();

        netSD.params().assign(netStd.params());

        assertEquals(netStd.paramTable(), netSD.paramTable());

        int minibatch = 2;
        INDArray in = Nd4j.rand(minibatch, 5);
        INDArray label = Nd4j.rand(minibatch, 5);

        INDArray outSD = netSD.output(in);
        INDArray outStd = netStd.output(in);
        assertEquals(outStd, outSD);

        DataSet ds = new DataSet(in, label);
        double scoreSD = netSD.score(ds);
        double scoreStd = netStd.score(ds);
        assertEquals(scoreStd, scoreSD, 1e-6);

        netSD.setInput(in);
        netSD.setLabels(label);

        netStd.setInput(in);
        netStd.setLabels(label);

        //System.out.println(((SameDiffOutputLayer) netSD.getLayer(1)).sameDiff.summary());

        netSD.computeGradientAndScore();
        netStd.computeGradientAndScore();

        assertEquals(netStd.getFlattenedGradients(), netSD.getFlattenedGradients());

        for (int i = 0; i < 3; i++) {
            netSD.fit(ds);
            netStd.fit(ds);
            String s = String.valueOf(i);
            assertEquals(s, netStd.params(), netSD.params());
            assertEquals(s, netStd.getFlattenedGradients(), netSD.getFlattenedGradients());
        }

        //Test fit before output:
        MultiLayerNetwork net = new MultiLayerNetwork(confSD.clone());
        net.init();
        net.fit(ds);

        //Sanity check on different minibatch sizes:
        INDArray newIn = Nd4j.vstack(in, in);
        INDArray outMbsd = netSD.output(newIn);
        INDArray outMb = netStd.output(newIn);
        assertEquals(outMb, outMbsd);
    }
}
 
Example 7
Source File: GlobalPoolingMaskingTests.java    From deeplearning4j with Apache License 2.0 4 votes vote down vote up
@Test
public void testMaskingCnnDim3_SingleExample() {
    //Test masking, where mask is along dimension 3

    int minibatch = 1;
    int depthIn = 2;
    int depthOut = 2;
    int nOut = 2;
    int height = 3;
    int width = 6;

    PoolingType[] poolingTypes =
                    new PoolingType[] {PoolingType.SUM, PoolingType.AVG, PoolingType.MAX, PoolingType.PNORM};

    for (PoolingType pt : poolingTypes) {
        MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder().weightInit(WeightInit.XAVIER)
                        .convolutionMode(ConvolutionMode.Same).seed(12345L).list()
                        .layer(0, new ConvolutionLayer.Builder().nIn(depthIn).nOut(depthOut).kernelSize(height, 2)
                                        .stride(height, 1).activation(Activation.TANH).build())
                        .layer(1, new org.deeplearning4j.nn.conf.layers.GlobalPoolingLayer.Builder().poolingType(pt)
                                        .build())
                        .layer(2, new OutputLayer.Builder(LossFunctions.LossFunction.MCXENT)
                                        .activation(Activation.SOFTMAX).nIn(depthOut).nOut(nOut).build())
                        .build();

        MultiLayerNetwork net = new MultiLayerNetwork(conf);
        net.init();

        INDArray inToBeMasked = Nd4j.rand(new int[] {minibatch, depthIn, height, width});

        //Shape for mask: [minibatch, 1, 1, width]
        INDArray maskArray = Nd4j.create(new double[] {1, 1, 1, 1, 1, 0}, new int[]{1,1,1,width});

        //Multiply the input by the mask array, to ensure the 0s in the mask correspond to 0s in the input vector
        // as would be the case in practice...
        Nd4j.getExecutioner().exec(new BroadcastMulOp(inToBeMasked, maskArray, inToBeMasked, 0, 3));


        net.setLayerMaskArrays(maskArray, null);

        INDArray outMasked = net.output(inToBeMasked);
        net.clearLayerMaskArrays();

        int numSteps = width - 1;
        INDArray subset = inToBeMasked.get(NDArrayIndex.interval(0, 0, true), NDArrayIndex.all(),
                        NDArrayIndex.all(), NDArrayIndex.interval(0, numSteps));
        assertArrayEquals(new long[] {1, depthIn, height, 5}, subset.shape());

        INDArray outSubset = net.output(subset);
        INDArray outMaskedSubset = outMasked.getRow(0);

        assertEquals(outSubset, outMaskedSubset);

        //Finally: check gradient calc for exceptions
        net.setLayerMaskArrays(maskArray, null);
        net.setInput(inToBeMasked);
        INDArray labels = Nd4j.create(new double[] {0, 1}, new long[]{1,2});
        net.setLabels(labels);

        net.computeGradientAndScore();
    }
}
 
Example 8
Source File: DropoutLayerTest.java    From deeplearning4j with Apache License 2.0 4 votes vote down vote up
@Test
public void testDropoutLayerWithConvMnist() throws Exception {
    Nd4j.setDefaultDataTypes(DataType.DOUBLE, DataType.DOUBLE); //Set to double datatype - MKL-DNN not used for CPU (otherwise different strides due to Dl4J impl permutes)
    DataSetIterator iter = new MnistDataSetIterator(2, 2);
    DataSet next = iter.next();

    // Run without separate activation layer
    Nd4j.getRandom().setSeed(12345);
    MultiLayerConfiguration confIntegrated = new NeuralNetConfiguration.Builder().seed(123)
                    .list().layer(0,
                                    new ConvolutionLayer.Builder(4, 4).stride(2, 2).nIn(1).nOut(20)
                                                    .activation(Activation.TANH).weightInit(WeightInit.XAVIER)
                                                    .build())
                    .layer(1, new OutputLayer.Builder(LossFunctions.LossFunction.MCXENT)
                                    .weightInit(WeightInit.XAVIER).activation(Activation.SOFTMAX).dropOut(0.5)
                                    .nOut(10).build())
                    .setInputType(InputType.convolutionalFlat(28, 28, 1)).build();

    // Run with separate activation layer
    Nd4j.getRandom().setSeed(12345);

    //Manually configure preprocessors
    //This is necessary, otherwise CnnToFeedForwardPreprocessor will be in different locatinos
    //i.e., dropout on 4d activations in latter, and dropout on 2d activations in former
    Map<Integer, InputPreProcessor> preProcessorMap = new HashMap<>();
    preProcessorMap.put(1, new CnnToFeedForwardPreProcessor(13, 13, 20));

    MultiLayerConfiguration confSeparate = new NeuralNetConfiguration.Builder().seed(123).list()
                    .layer(0, new ConvolutionLayer.Builder(4, 4).stride(2, 2).nIn(1).nOut(20)
                                    .activation(Activation.TANH).weightInit(WeightInit.XAVIER).build())
                    .layer(1, new DropoutLayer.Builder(0.5).build())
                    .layer(2, new OutputLayer.Builder(LossFunctions.LossFunction.MCXENT)
                                    .weightInit(WeightInit.XAVIER).activation(Activation.SOFTMAX).nOut(10).build())
                    .inputPreProcessors(preProcessorMap)
                    .setInputType(InputType.convolutionalFlat(28, 28, 1)).build();


    Nd4j.getRandom().setSeed(12345);
    MultiLayerNetwork netIntegrated = new MultiLayerNetwork(confIntegrated);
    netIntegrated.init();

    Nd4j.getRandom().setSeed(12345);
    MultiLayerNetwork netSeparate = new MultiLayerNetwork(confSeparate);
    netSeparate.init();

    assertEquals(netIntegrated.params(), netSeparate.params());

    Nd4j.getRandom().setSeed(12345);
    netIntegrated.fit(next);

    Nd4j.getRandom().setSeed(12345);
    netSeparate.fit(next);

    assertEquals(netIntegrated.params(), netSeparate.params());

    // check parameters
    assertEquals(netIntegrated.getLayer(0).getParam("W"), netSeparate.getLayer(0).getParam("W"));
    assertEquals(netIntegrated.getLayer(0).getParam("b"), netSeparate.getLayer(0).getParam("b"));
    assertEquals(netIntegrated.getLayer(1).getParam("W"), netSeparate.getLayer(2).getParam("W"));
    assertEquals(netIntegrated.getLayer(1).getParam("b"), netSeparate.getLayer(2).getParam("b"));

    // check activations
    netIntegrated.setInput(next.getFeatures().dup());
    netSeparate.setInput(next.getFeatures().dup());

    Nd4j.getRandom().setSeed(12345);
    List<INDArray> actTrainIntegrated = netIntegrated.feedForward(true);
    Nd4j.getRandom().setSeed(12345);
    List<INDArray> actTrainSeparate = netSeparate.feedForward(true);
    assertEquals(actTrainIntegrated.get(1), actTrainSeparate.get(1));
    assertEquals(actTrainIntegrated.get(2), actTrainSeparate.get(3));

    netIntegrated.setInput(next.getFeatures().dup());
    netSeparate.setInput(next.getFeatures().dup());
    Nd4j.getRandom().setSeed(12345);
    List<INDArray> actTestIntegrated = netIntegrated.feedForward(false);
    Nd4j.getRandom().setSeed(12345);
    List<INDArray> actTestSeparate = netSeparate.feedForward(false);
    assertEquals(actTestIntegrated.get(1), actTestSeparate.get(1));
    assertEquals(actTestIntegrated.get(2), actTestSeparate.get(3));
}
 
Example 9
Source File: GlobalPoolingMaskingTests.java    From deeplearning4j with Apache License 2.0 4 votes vote down vote up
@Test
public void testMaskingCnnDim2_SingleExample() {
    //Test masking, where mask is along dimension 2

    int minibatch = 1;
    int depthIn = 2;
    int depthOut = 2;
    int nOut = 2;
    int height = 6;
    int width = 3;

    PoolingType[] poolingTypes =
                    new PoolingType[] {PoolingType.SUM, PoolingType.AVG, PoolingType.MAX, PoolingType.PNORM};

    for (PoolingType pt : poolingTypes) {
        MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder().weightInit(WeightInit.XAVIER)
                        .convolutionMode(ConvolutionMode.Same).seed(12345L).list()
                        .layer(0, new ConvolutionLayer.Builder().nIn(depthIn).nOut(depthOut).kernelSize(2, width)
                                        .stride(1, width).activation(Activation.TANH).build())
                        .layer(1, new org.deeplearning4j.nn.conf.layers.GlobalPoolingLayer.Builder().poolingType(pt)
                                        .build())
                        .layer(2, new OutputLayer.Builder(LossFunctions.LossFunction.MCXENT)
                                        .activation(Activation.SOFTMAX).nIn(depthOut).nOut(nOut).build())
                        .build();

        MultiLayerNetwork net = new MultiLayerNetwork(conf);
        net.init();

        INDArray inToBeMasked = Nd4j.rand(new int[] {minibatch, depthIn, height, width});

        //Shape for mask: [minibatch, width]
        INDArray maskArray = Nd4j.create(new double[] {1, 1, 1, 1, 1, 0}, new int[]{1,1,height,1});

        //Multiply the input by the mask array, to ensure the 0s in the mask correspond to 0s in the input vector
        // as would be the case in practice...
        Nd4j.getExecutioner().exec(new BroadcastMulOp(inToBeMasked, maskArray, inToBeMasked, 0, 2));


        net.setLayerMaskArrays(maskArray, null);

        INDArray outMasked = net.output(inToBeMasked);
        net.clearLayerMaskArrays();

        int numSteps = height - 1;
        INDArray subset = inToBeMasked.get(NDArrayIndex.interval(0, 0, true), NDArrayIndex.all(),
                        NDArrayIndex.interval(0, numSteps), NDArrayIndex.all());
        assertArrayEquals(new long[] {1, depthIn, 5, width}, subset.shape());

        INDArray outSubset = net.output(subset);
        INDArray outMaskedSubset = outMasked.getRow(0);

        assertEquals(outSubset, outMaskedSubset);

        //Finally: check gradient calc for exceptions
        net.setLayerMaskArrays(maskArray, null);
        net.setInput(inToBeMasked);
        INDArray labels = Nd4j.create(new double[] {0, 1}, new long[]{1,2});
        net.setLabels(labels);

        net.computeGradientAndScore();
    }
}
 
Example 10
Source File: ValidateCudnnLSTM.java    From deeplearning4j with Apache License 2.0 4 votes vote down vote up
@Test
public void validateImplMultiLayer() throws Exception {

    Nd4j.getRandom().setSeed(12345);
    int minibatch = 10;
    int inputSize = 3;
    int lstmLayerSize = 4;
    int timeSeriesLength = 3;
    int nOut = 2;
    INDArray input = Nd4j.rand(new int[] {minibatch, inputSize, timeSeriesLength});
    INDArray labels = Nd4j.zeros(minibatch, nOut, timeSeriesLength);
    Random r = new Random(12345);
    for (int i = 0; i < minibatch; i++) {
        for (int j = 0; j < timeSeriesLength; j++) {
            labels.putScalar(i, r.nextInt(nOut), j, 1.0);
        }
    }

    MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder().updater(new NoOp())
                    .dataType(DataType.DOUBLE)
                    .inferenceWorkspaceMode(WorkspaceMode.NONE).trainingWorkspaceMode(WorkspaceMode.NONE)
                    .seed(12345L)
                    .dist(new NormalDistribution(0, 2)).list()
                    .layer(0, new LSTM.Builder().nIn(input.size(1)).nOut(lstmLayerSize)
                                    .gateActivationFunction(Activation.SIGMOID).activation(Activation.TANH).build())
                    .layer(1, new LSTM.Builder().nIn(lstmLayerSize).nOut(lstmLayerSize)
                                    .gateActivationFunction(Activation.SIGMOID).activation(Activation.TANH).build())
                    .layer(2, new RnnOutputLayer.Builder(LossFunctions.LossFunction.MCXENT)
                                    .activation(Activation.SOFTMAX).nIn(lstmLayerSize).nOut(nOut).build())
                    .build();

    MultiLayerNetwork mln1 = new MultiLayerNetwork(conf.clone());
    mln1.init();

    MultiLayerNetwork mln2 = new MultiLayerNetwork(conf.clone());
    mln2.init();


    assertEquals(mln1.params(), mln2.params());

    Field f = org.deeplearning4j.nn.layers.recurrent.LSTM.class.getDeclaredField("helper");
    f.setAccessible(true);

    Layer l0 = mln1.getLayer(0);
    Layer l1 = mln1.getLayer(1);
    f.set(l0, null);
    f.set(l1, null);
    assertNull(f.get(l0));
    assertNull(f.get(l1));

    l0 = mln2.getLayer(0);
    l1 = mln2.getLayer(1);
    assertTrue(f.get(l0) instanceof CudnnLSTMHelper);
    assertTrue(f.get(l1) instanceof CudnnLSTMHelper);


    INDArray out1 = mln1.output(input);
    INDArray out2 = mln2.output(input);

    assertEquals(out1, out2);

    for (int x = 0; x < 10; x++) {
        input = Nd4j.rand(new int[] {minibatch, inputSize, timeSeriesLength});
        labels = Nd4j.zeros(minibatch, nOut, timeSeriesLength);
        for (int i = 0; i < minibatch; i++) {
            for (int j = 0; j < timeSeriesLength; j++) {
                labels.putScalar(i, r.nextInt(nOut), j, 1.0);
            }
        }

        mln1.setInput(input);
        mln1.setLabels(labels);

        mln2.setInput(input);
        mln2.setLabels(labels);

        mln1.computeGradientAndScore();
        mln2.computeGradientAndScore();

        assertEquals(mln1.score(), mln2.score(), 1e-5);

        assertEquals(mln1.getFlattenedGradients(), mln2.getFlattenedGradients());

        mln1.fit(new DataSet(input, labels));
        mln2.fit(new DataSet(input, labels));

        assertEquals("Iteration: " + x, mln1.params(), mln2.params());
    }
}
 
Example 11
Source File: ActivationLayerTest.java    From deeplearning4j with Apache License 2.0 4 votes vote down vote up
@Test
public void testDenseActivationLayer() throws Exception {
    DataSetIterator iter = new MnistDataSetIterator(2, 2);
    DataSet next = iter.next();

    // Run without separate activation layer
    MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder()
                    .optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT).seed(123)
                    .list()
                    .layer(0, new DenseLayer.Builder().nIn(28 * 28 * 1).nOut(10).activation(Activation.RELU)
                                    .weightInit(WeightInit.XAVIER).build())
                    .layer(1, new org.deeplearning4j.nn.conf.layers.OutputLayer.Builder(
                                    LossFunctions.LossFunction.MCXENT).weightInit(WeightInit.XAVIER)
                                                    .activation(Activation.SOFTMAX).nIn(10).nOut(10).build())
                    .build();

    MultiLayerNetwork network = new MultiLayerNetwork(conf);
    network.init();
    network.fit(next);


    // Run with separate activation layer
    MultiLayerConfiguration conf2 = new NeuralNetConfiguration.Builder()
                    .optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT).seed(123)
                    .list()
                    .layer(0, new DenseLayer.Builder().nIn(28 * 28 * 1).nOut(10).activation(Activation.IDENTITY)
                                    .weightInit(WeightInit.XAVIER).build())
                    .layer(1, new org.deeplearning4j.nn.conf.layers.ActivationLayer.Builder()
                                    .activation(Activation.RELU).build())
                    .layer(2, new OutputLayer.Builder(LossFunctions.LossFunction.MCXENT)
                                    .weightInit(WeightInit.XAVIER).activation(Activation.SOFTMAX).nIn(10).nOut(10)
                                    .build())
                    .build();

    MultiLayerNetwork network2 = new MultiLayerNetwork(conf2);
    network2.init();
    network2.fit(next);

    // check parameters
    assertEquals(network.getLayer(0).getParam("W"), network2.getLayer(0).getParam("W"));
    assertEquals(network.getLayer(1).getParam("W"), network2.getLayer(2).getParam("W"));
    assertEquals(network.getLayer(0).getParam("b"), network2.getLayer(0).getParam("b"));
    assertEquals(network.getLayer(1).getParam("b"), network2.getLayer(2).getParam("b"));

    // check activations
    network.init();
    network.setInput(next.getFeatures());
    List<INDArray> activations = network.feedForward(true);

    network2.init();
    network2.setInput(next.getFeatures());
    List<INDArray> activations2 = network2.feedForward(true);

    assertEquals(activations.get(1).reshape(activations2.get(2).shape()), activations2.get(2));
    assertEquals(activations.get(2), activations2.get(3));


}
 
Example 12
Source File: DTypeTests.java    From deeplearning4j with Apache License 2.0 4 votes vote down vote up
@Test
public void testDtypesModelVsGlobalDtypeCnn1d() {
    //Nd4jCpu.Environment.getInstance().setUseMKLDNN(false);

    for (DataType globalDtype : new DataType[]{DataType.DOUBLE, DataType.FLOAT, DataType.HALF}) {
        Nd4j.setDefaultDataTypes(globalDtype, globalDtype);
        for (DataType networkDtype : new DataType[]{DataType.DOUBLE, DataType.FLOAT, DataType.HALF}) {
            for (int outputLayer = 0; outputLayer < 3; outputLayer++) {
                assertEquals(globalDtype, Nd4j.dataType());
                assertEquals(globalDtype, Nd4j.defaultFloatingPointType());

                String msg = "Global dtype: " + globalDtype + ", network dtype: " + networkDtype + ", outputLayer=" + outputLayer;

                Layer ol;
                Layer secondLast;
                switch (outputLayer) {
                    case 0:
                        ol = new OutputLayer.Builder().nOut(10).activation(Activation.SOFTMAX).lossFunction(LossFunctions.LossFunction.MCXENT).build();
                        secondLast = new GlobalPoolingLayer(PoolingType.MAX);
                        break;
                    case 1:
                        ol = new RnnOutputLayer.Builder().activation(Activation.SOFTMAX).lossFunction(LossFunctions.LossFunction.MCXENT).nOut(5).build();
                        secondLast = new Convolution1D.Builder().kernelSize(2).nOut(5).build();
                        break;
                    case 2:
                        ol = new RnnLossLayer.Builder().activation(Activation.SOFTMAX).lossFunction(LossFunctions.LossFunction.MCXENT).build();
                        secondLast = new Convolution1D.Builder().kernelSize(2).nOut(5).build();
                        break;
                    default:
                        throw new RuntimeException();
                }


                MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder()
                        .trainingWorkspaceMode(WorkspaceMode.NONE)
                        .inferenceWorkspaceMode(WorkspaceMode.NONE)
                        .dataType(networkDtype)
                        .convolutionMode(ConvolutionMode.Same)
                        .updater(new Adam(1e-2))
                        .list()
                        .layer(new Convolution1D.Builder().kernelSize(2).stride(1).nOut(3).activation(Activation.TANH).build())
                        .layer(new Subsampling1DLayer.Builder().poolingType(PoolingType.MAX).kernelSize(5).stride(1).build())
                        .layer(new Cropping1D.Builder(1).build())
                        .layer(new ZeroPadding1DLayer(1))
                        .layer(new Upsampling1D.Builder(2).build())
                        .layer(secondLast)
                        .layer(ol)
                        .setInputType(InputType.recurrent(5, 10))
                        .build();

                MultiLayerNetwork net = new MultiLayerNetwork(conf);
                net.init();

                net.initGradientsView();
                assertEquals(msg, networkDtype, net.params().dataType());
                assertEquals(msg, networkDtype, net.getFlattenedGradients().dataType());
                assertEquals(msg, networkDtype, net.getUpdater(true).getStateViewArray().dataType());

                INDArray in = Nd4j.rand(networkDtype, 2, 5, 10);
                INDArray label;
                if (outputLayer == 0) {
                    //OutputLayer
                    label = TestUtils.randomOneHot(2, 10).castTo(networkDtype);
                } else {
                    //RnnOutputLayer, RnnLossLayer
                    label = Nd4j.rand(networkDtype, 2, 5, 20);   //Longer sequence due to upsampling
                }

                INDArray out = net.output(in);
                assertEquals(msg, networkDtype, out.dataType());
                List<INDArray> ff = net.feedForward(in);
                for (int i = 0; i < ff.size(); i++) {
                    String s = msg + " - layer " + (i - 1) + " - " + (i == 0 ? "input" : net.getLayer(i - 1).conf().getLayer().getClass().getSimpleName());
                    assertEquals(s, networkDtype, ff.get(i).dataType());
                }

                net.setInput(in);
                net.setLabels(label);
                net.computeGradientAndScore();

                net.fit(new DataSet(in, label));

                logUsedClasses(net);

                //Now, test mismatched dtypes for input/labels:
                for (DataType inputLabelDtype : new DataType[]{DataType.DOUBLE, DataType.FLOAT, DataType.HALF}) {
                    System.out.println(msg + " - " + inputLabelDtype);
                    INDArray in2 = in.castTo(inputLabelDtype);
                    INDArray label2 = label.castTo(inputLabelDtype);
                    net.output(in2);
                    net.setInput(in2);
                    net.setLabels(label2);
                    net.computeGradientAndScore();

                    net.fit(new DataSet(in2, label2));
                }
            }
        }
    }
}
 
Example 13
Source File: CNNGradientCheckTest.java    From deeplearning4j with Apache License 2.0 4 votes vote down vote up
@Test
public void testGradientCNNMLN() {
    //Parameterized test, testing combinations of:
    // (a) activation function
    // (b) Whether to test at random initialization, or after some learning (i.e., 'characteristic mode of operation')
    // (c) Loss function (with specified output activations)
    Activation[] activFns = {Activation.SIGMOID, Activation.TANH};
    boolean[] characteristic = {false, true}; //If true: run some backprop steps first

    LossFunctions.LossFunction[] lossFunctions =
            {LossFunctions.LossFunction.NEGATIVELOGLIKELIHOOD, LossFunctions.LossFunction.MSE};
    Activation[] outputActivations = {Activation.SOFTMAX, Activation.TANH}; //i.e., lossFunctions[i] used with outputActivations[i] here

    DataSet ds = new IrisDataSetIterator(150, 150).next();
    ds.normalizeZeroMeanZeroUnitVariance();
    INDArray input = ds.getFeatures();
    INDArray labels = ds.getLabels();

    for (Activation afn : activFns) {
        for (boolean doLearningFirst : characteristic) {
            for (int i = 0; i < lossFunctions.length; i++) {
                LossFunctions.LossFunction lf = lossFunctions[i];
                Activation outputActivation = outputActivations[i];

                MultiLayerConfiguration.Builder builder = new NeuralNetConfiguration.Builder()
                        .dataType(DataType.DOUBLE)
                        .optimizationAlgo(OptimizationAlgorithm.CONJUGATE_GRADIENT).updater(new NoOp())
                        .weightInit(WeightInit.XAVIER).seed(12345L).list()
                        .layer(0, new ConvolutionLayer.Builder(1, 1).nOut(6).activation(afn)
                                .cudnnAllowFallback(false)
                                .build())
                        .layer(1, new OutputLayer.Builder(lf).activation(outputActivation).nOut(3).build())
                        .setInputType(InputType.convolutionalFlat(1, 4, 1));

                MultiLayerConfiguration conf = builder.build();

                MultiLayerNetwork mln = new MultiLayerNetwork(conf);
                mln.init();
                String name = new Object() {
                }.getClass().getEnclosingMethod().getName();

                if (doLearningFirst) {
                    //Run a number of iterations of learning
                    mln.setInput(ds.getFeatures());
                    mln.setLabels(ds.getLabels());
                    mln.computeGradientAndScore();
                    double scoreBefore = mln.score();
                    for (int j = 0; j < 10; j++)
                        mln.fit(ds);
                    mln.computeGradientAndScore();
                    double scoreAfter = mln.score();
                    //Can't test in 'characteristic mode of operation' if not learning
                    String msg = name + " - score did not (sufficiently) decrease during learning - activationFn="
                            + afn + ", lossFn=" + lf + ", outputActivation=" + outputActivation
                            + ", doLearningFirst= " + doLearningFirst + " (before=" + scoreBefore
                            + ", scoreAfter=" + scoreAfter + ")";
                    assertTrue(msg, scoreAfter < 0.8 * scoreBefore);
                }

                if (PRINT_RESULTS) {
                    System.out.println(name + " - activationFn=" + afn + ", lossFn=" + lf + ", outputActivation="
                            + outputActivation + ", doLearningFirst=" + doLearningFirst);
                }

                boolean gradOK = GradientCheckUtil.checkGradients(mln, DEFAULT_EPS, DEFAULT_MAX_REL_ERROR,
                        DEFAULT_MIN_ABS_ERROR, PRINT_RESULTS, RETURN_ON_FIRST_FAILURE, input, labels);

                assertTrue(gradOK);
                TestUtils.testModelSerialization(mln);
            }
        }
    }
}
 
Example 14
Source File: EmbeddingLayerTest.java    From deeplearning4j with Apache License 2.0 4 votes vote down vote up
@Test
    public void testEmbeddingSequenceBackwardPass() {
        int nClassesIn = 10;
        int embeddingDim = 5;
        int nOut = 4;
        int inputLength = 1;

        MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder().activation(Activation.TANH).list()
                .layer(new EmbeddingSequenceLayer.Builder().inputLength(inputLength)
                        .hasBias(true).nIn(nClassesIn).nOut(embeddingDim).build())
                .layer(new RnnOutputLayer.Builder().nIn(embeddingDim).nOut(nOut).activation(Activation.SOFTMAX).build())
                .build();
        MultiLayerConfiguration conf2 = new NeuralNetConfiguration.Builder().activation(Activation.TANH).list()
                .layer(new DenseLayer.Builder().nIn(nClassesIn).nOut(embeddingDim).activation(Activation.IDENTITY).build())
                .layer(new RnnOutputLayer.Builder().nIn(embeddingDim).nOut(nOut).activation(Activation.SOFTMAX).build())
                .setInputType(InputType.recurrent(nClassesIn))
                .build();

        MultiLayerNetwork net = new MultiLayerNetwork(conf);
        MultiLayerNetwork net2 = new MultiLayerNetwork(conf2);
        net.init();
        net2.init();

        net2.setParams(net.params().dup());

        int batchSize = 3;
        INDArray inEmbedding = Nd4j.create(batchSize, 1);
        INDArray inOneHot = Nd4j.create(batchSize, nClassesIn, 1);
        INDArray outLabels = Nd4j.create(batchSize, 4, 1);

        Random r = new Random(1337);
        for (int i = 0; i < batchSize; i++) {
            int classIdx = r.nextInt(nClassesIn);
            inEmbedding.putScalar(i, classIdx);
            inOneHot.putScalar(new int[]{i, classIdx, 0}, 1.0);

            int labelIdx = r.nextInt(4);
            outLabels.putScalar(new int[]{i, labelIdx, 0}, 1.0);
        }

        net.setInput(inEmbedding);
        net2.setInput(inOneHot);
        net.setLabels(outLabels);
        net2.setLabels(outLabels);

        net.computeGradientAndScore();
        net2.computeGradientAndScore();

//        System.out.println(net.score() + "\t" + net2.score());
        assertEquals(net2.score(), net.score(), 1e-6);

        Map<String, INDArray> gradient = net.gradient().gradientForVariable();
        Map<String, INDArray> gradient2 = net2.gradient().gradientForVariable();
        assertEquals(gradient.size(), gradient2.size());

        for (String s : gradient.keySet()) {
            assertEquals(gradient2.get(s), gradient.get(s));
        }
    }
 
Example 15
Source File: TestCustomUpdater.java    From deeplearning4j with Apache License 2.0 4 votes vote down vote up
@Test
public void testCustomUpdater() {

    //Create a simple custom updater, equivalent to SGD updater

    double lr = 0.03;

    Nd4j.getRandom().setSeed(12345);
    MultiLayerConfiguration conf1 = new NeuralNetConfiguration.Builder().seed(12345)
                    .activation(Activation.TANH).updater(new CustomIUpdater(lr)) //Specify custom IUpdater
                    .list().layer(0, new DenseLayer.Builder().nIn(10).nOut(10).build())
                    .layer(1, new OutputLayer.Builder().nIn(10).nOut(10)
                                    .lossFunction(LossFunctions.LossFunction.MSE).build())
                    .build();

    Nd4j.getRandom().setSeed(12345);
    MultiLayerConfiguration conf2 = new NeuralNetConfiguration.Builder().seed(12345)
                    .activation(Activation.TANH).updater(new Sgd(lr)).list()
                    .layer(0, new DenseLayer.Builder().nIn(10).nOut(10).build()).layer(1, new OutputLayer.Builder()
                                    .nIn(10).nOut(10).lossFunction(LossFunctions.LossFunction.MSE).build())
                    .build();

    //First: Check updater config
    assertTrue(((BaseLayer) conf1.getConf(0).getLayer()).getIUpdater() instanceof CustomIUpdater);
    assertTrue(((BaseLayer) conf1.getConf(1).getLayer()).getIUpdater() instanceof CustomIUpdater);
    assertTrue(((BaseLayer) conf2.getConf(0).getLayer()).getIUpdater() instanceof Sgd);
    assertTrue(((BaseLayer) conf2.getConf(1).getLayer()).getIUpdater() instanceof Sgd);

    CustomIUpdater u0_0 = (CustomIUpdater) ((BaseLayer) conf1.getConf(0).getLayer()).getIUpdater();
    CustomIUpdater u0_1 = (CustomIUpdater) ((BaseLayer) conf1.getConf(1).getLayer()).getIUpdater();
    assertEquals(lr, u0_0.getLearningRate(), 1e-6);
    assertEquals(lr, u0_1.getLearningRate(), 1e-6);

    Sgd u1_0 = (Sgd) ((BaseLayer) conf2.getConf(0).getLayer()).getIUpdater();
    Sgd u1_1 = (Sgd) ((BaseLayer) conf2.getConf(1).getLayer()).getIUpdater();
    assertEquals(lr, u1_0.getLearningRate(), 1e-6);
    assertEquals(lr, u1_1.getLearningRate(), 1e-6);


    //Second: check JSON
    String asJson = conf1.toJson();
    MultiLayerConfiguration fromJson = MultiLayerConfiguration.fromJson(asJson);
    assertEquals(conf1, fromJson);

    Nd4j.getRandom().setSeed(12345);
    MultiLayerNetwork net1 = new MultiLayerNetwork(conf1);
    net1.init();

    Nd4j.getRandom().setSeed(12345);
    MultiLayerNetwork net2 = new MultiLayerNetwork(conf2);
    net2.init();


    //Third: check gradients are equal
    INDArray in = Nd4j.rand(5, 10);
    INDArray labels = Nd4j.rand(5, 10);

    net1.setInput(in);
    net2.setInput(in);

    net1.setLabels(labels);
    net2.setLabels(labels);

    net1.computeGradientAndScore();
    net2.computeGradientAndScore();;

    assertEquals(net1.getFlattenedGradients(), net2.getFlattenedGradients());
}
 
Example 16
Source File: TestMultiModelGradientApplication.java    From deeplearning4j with Apache License 2.0 4 votes vote down vote up
@Test
public void testGradientApplyMultiLayerNetwork() {
    int minibatch = 7;
    int nIn = 10;
    int nOut = 10;

    for (boolean regularization : new boolean[] {false, true}) {
        for (IUpdater u : new IUpdater[] {new Sgd(0.1), new Nesterovs(0.1), new Adam(0.1)}) {

            MultiLayerConfiguration conf =
                            new NeuralNetConfiguration.Builder().seed(12345).activation(Activation.TANH)
                                            .weightInit(WeightInit.XAVIER).updater(u)
                                            .l1(regularization ? 0.2 : 0.0)
                                            .l2(regularization ? 0.3 : 0.0).list()
                                            .layer(0, new DenseLayer.Builder().nIn(nIn).nOut(10).build())
                                            .layer(1, new DenseLayer.Builder().nIn(10).nOut(10).build()).layer(2,
                                                            new OutputLayer.Builder(
                                                                            LossFunctions.LossFunction.MCXENT)
                                                                                            .activation(Activation.SOFTMAX)
                                                                                            .nIn(10).nOut(nOut)
                                                                                            .build())
                                            .build();


            Nd4j.getRandom().setSeed(12345);
            MultiLayerNetwork net1GradCalc = new MultiLayerNetwork(conf);
            net1GradCalc.init();

            Nd4j.getRandom().setSeed(12345);
            MultiLayerNetwork net2GradUpd = new MultiLayerNetwork(conf.clone());
            net2GradUpd.init();

            assertEquals(net1GradCalc.params(), net2GradUpd.params());

            INDArray f = Nd4j.rand(minibatch, nIn);
            INDArray l = Nd4j.create(minibatch, nOut);
            for (int i = 0; i < minibatch; i++) {
                l.putScalar(i, i % nOut, 1.0);
            }
            net1GradCalc.setInput(f);
            net1GradCalc.setLabels(l);

            net2GradUpd.setInput(f);
            net2GradUpd.setLabels(l);

            //Calculate gradient in first net, update and apply it in the second
            //Also: calculate gradient in the second net, just to be sure it isn't modified while doing updating on
            // the other net's gradient
            net1GradCalc.computeGradientAndScore();
            net2GradUpd.computeGradientAndScore();

            Gradient g = net1GradCalc.gradient();
            INDArray gBefore = g.gradient().dup(); //Net 1 gradient should be modified
            INDArray net2GradBefore = net2GradUpd.gradient().gradient().dup(); //But net 2 gradient should not be
            net2GradUpd.getUpdater().update(net2GradUpd, g, 0, 0, minibatch, LayerWorkspaceMgr.noWorkspaces());
            INDArray gAfter = g.gradient().dup();
            INDArray net2GradAfter = net2GradUpd.gradient().gradient().dup();

            assertNotEquals(gBefore, gAfter); //Net 1 gradient should be modified
            assertEquals(net2GradBefore, net2GradAfter); //But net 2 gradient should not be


            //Also: if we apply the gradient using a subi op, we should get the same final params as if we did a fit op
            // on the original network
            net2GradUpd.params().subi(g.gradient());

            net1GradCalc.fit(f, l);
            assertEquals(net1GradCalc.params(), net2GradUpd.params());


            //=============================
            if (!(u instanceof Sgd)) {
                net2GradUpd.getUpdater().getStateViewArray().assign(net1GradCalc.getUpdater().getStateViewArray());
            }
            assertEquals(net1GradCalc.params(), net2GradUpd.params());
            assertEquals(net1GradCalc.getUpdater().getStateViewArray(),
                            net2GradUpd.getUpdater().getStateViewArray());

            //Remove the next 2 lines: fails - as net 1 is 1 iteration ahead
            net1GradCalc.getLayerWiseConfigurations().setIterationCount(0);
            net2GradUpd.getLayerWiseConfigurations().setIterationCount(0);

            for (int i = 0; i < 100; i++) {
                net1GradCalc.fit(f, l);
                net2GradUpd.fit(f, l);
                assertEquals(net1GradCalc.params(), net2GradUpd.params());
            }
        }
    }
}
 
Example 17
Source File: CNN1DGradientCheckTest.java    From deeplearning4j with Apache License 2.0 4 votes vote down vote up
@Test
public void testCnn1dWithMasking(){
    int length = 12;
    int convNIn = 2;
    int convNOut1 = 3;
    int convNOut2 = 4;
    int finalNOut = 3;

    int pnorm = 2;

    SubsamplingLayer.PoolingType[] poolingTypes =
            new SubsamplingLayer.PoolingType[] {SubsamplingLayer.PoolingType.MAX, SubsamplingLayer.PoolingType.AVG};

    for (SubsamplingLayer.PoolingType poolingType : poolingTypes) {
        for(ConvolutionMode cm : new ConvolutionMode[]{ConvolutionMode.Same, ConvolutionMode.Truncate}){
            for( int stride : new int[]{1, 2}){
                String s = cm + ", stride=" + stride + ", pooling=" + poolingType;
                log.info("Starting test: " + s);
                Nd4j.getRandom().setSeed(12345);

                MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder()
                        .dataType(DataType.DOUBLE)
                        .updater(new NoOp())
                        .activation(Activation.TANH)
                        .dist(new NormalDistribution(0, 1)).convolutionMode(cm)
                        .seed(12345)
                        .list()
                        .layer(new Convolution1DLayer.Builder().kernelSize(2)
                                .stride(stride).nIn(convNIn).nOut(convNOut1)
                                .build())
                        .layer(new Subsampling1DLayer.Builder(poolingType).kernelSize(2)
                                .stride(stride).pnorm(pnorm).build())
                        .layer(new Convolution1DLayer.Builder().kernelSize(2)
                                .stride(stride).nIn(convNOut1).nOut(convNOut2)
                                .build())
                        .layer(new GlobalPoolingLayer(PoolingType.AVG))
                        .layer(new OutputLayer.Builder(LossFunctions.LossFunction.MCXENT)
                                .activation(Activation.SOFTMAX).nOut(finalNOut).build())
                        .setInputType(InputType.recurrent(convNIn, length)).build();

                MultiLayerNetwork net = new MultiLayerNetwork(conf);
                net.init();

                INDArray f = Nd4j.rand(new int[]{2, convNIn, length});
                INDArray fm = Nd4j.create(2, length);
                fm.get(NDArrayIndex.point(0), NDArrayIndex.all()).assign(1);
                fm.get(NDArrayIndex.point(1), NDArrayIndex.interval(0,6)).assign(1);

                INDArray label = TestUtils.randomOneHot(2, finalNOut);

                boolean gradOK = GradientCheckUtil.checkGradients(new GradientCheckUtil.MLNConfig().net(net).input(f)
                        .labels(label).inputMask(fm));

                assertTrue(s, gradOK);
                TestUtils.testModelSerialization(net);

                //TODO also check that masked step values don't impact forward pass, score or gradients

                DataSet ds = new DataSet(f,label,fm,null);
                double scoreBefore = net.score(ds);
                net.setInput(f);
                net.setLabels(label);
                net.setLayerMaskArrays(fm, null);
                net.computeGradientAndScore();
                INDArray gradBefore = net.getFlattenedGradients().dup();
                f.putScalar(1, 0, 10, 10.0);
                f.putScalar(1, 1, 11, 20.0);
                double scoreAfter = net.score(ds);
                net.setInput(f);
                net.setLabels(label);
                net.setLayerMaskArrays(fm, null);
                net.computeGradientAndScore();
                INDArray gradAfter = net.getFlattenedGradients().dup();

                assertEquals(scoreBefore, scoreAfter, 1e-6);
                assertEquals(gradBefore, gradAfter);
            }
        }
    }
}
 
Example 18
Source File: CuDNNGradientChecks.java    From deeplearning4j with Apache License 2.0 4 votes vote down vote up
@Test
public void testConvolutional() throws Exception {

    //Parameterized test, testing combinations of:
    // (a) activation function
    // (b) Whether to test at random initialization, or after some learning (i.e., 'characteristic mode of operation')
    // (c) Loss function (with specified output activations)
    Activation[] activFns = {Activation.SIGMOID, Activation.TANH};
    boolean[] characteristic = {false, true}; //If true: run some backprop steps first

    int[] minibatchSizes = {1, 4};
    int width = 6;
    int height = 6;
    int inputDepth = 2;
    int nOut = 3;

    Field f = org.deeplearning4j.nn.layers.convolution.ConvolutionLayer.class.getDeclaredField("helper");
    f.setAccessible(true);

    Random r = new Random(12345);
    for (Activation afn : activFns) {
        for (boolean doLearningFirst : characteristic) {
            for (int minibatchSize : minibatchSizes) {

                INDArray input = Nd4j.rand(new int[] {minibatchSize, inputDepth, height, width});
                INDArray labels = Nd4j.zeros(minibatchSize, nOut);
                for (int i = 0; i < minibatchSize; i++) {
                    labels.putScalar(i, r.nextInt(nOut), 1.0);
                }

                MultiLayerConfiguration.Builder builder = new NeuralNetConfiguration.Builder()
                        .dataType(DataType.DOUBLE)
                                .optimizationAlgo(OptimizationAlgorithm.CONJUGATE_GRADIENT)
                                .dist(new UniformDistribution(-1, 1))
                                .updater(new NoOp()).seed(12345L).list()
                                .layer(0, new ConvolutionLayer.Builder(2, 2).stride(2, 2).padding(1, 1).nOut(3)
                                                .activation(afn).build())
                                .layer(1, new ConvolutionLayer.Builder(2, 2).stride(2, 2).padding(0, 0).nOut(3)
                                                .activation(afn).build())
                                .layer(2, new OutputLayer.Builder(LossFunctions.LossFunction.MCXENT)
                                                .activation(Activation.SOFTMAX).nOut(nOut).build())
                                .setInputType(InputType.convolutional(height, width, inputDepth))
                                ;

                MultiLayerConfiguration conf = builder.build();

                MultiLayerNetwork mln = new MultiLayerNetwork(conf);
                mln.init();

                org.deeplearning4j.nn.layers.convolution.ConvolutionLayer c0 =
                                (org.deeplearning4j.nn.layers.convolution.ConvolutionLayer) mln.getLayer(0);
                ConvolutionHelper ch0 = (ConvolutionHelper) f.get(c0);
                assertTrue(ch0 instanceof CudnnConvolutionHelper);

                org.deeplearning4j.nn.layers.convolution.ConvolutionLayer c1 =
                                (org.deeplearning4j.nn.layers.convolution.ConvolutionLayer) mln.getLayer(1);
                ConvolutionHelper ch1 = (ConvolutionHelper) f.get(c1);
                assertTrue(ch1 instanceof CudnnConvolutionHelper);

                //-------------------------------
                //For debugging/comparison to no-cudnn case: set helper field to null
                //                    f.set(c0, null);
                //                    f.set(c1, null);
                //                    assertNull(f.get(c0));
                //                    assertNull(f.get(c1));
                //-------------------------------


                String name = new Object() {}.getClass().getEnclosingMethod().getName();

                if (doLearningFirst) {
                    //Run a number of iterations of learning
                    mln.setInput(input);
                    mln.setLabels(labels);
                    mln.computeGradientAndScore();
                    double scoreBefore = mln.score();
                    for (int j = 0; j < 10; j++)
                        mln.fit(input, labels);
                    mln.computeGradientAndScore();
                    double scoreAfter = mln.score();
                    //Can't test in 'characteristic mode of operation' if not learning
                    String msg = name + " - score did not (sufficiently) decrease during learning - activationFn="
                                    + afn + ", doLearningFirst= " + doLearningFirst + " (before=" + scoreBefore
                                    + ", scoreAfter=" + scoreAfter + ")";
                    assertTrue(msg, scoreAfter < 0.8 * scoreBefore);
                }

                if (PRINT_RESULTS) {
                    System.out.println(name + " - activationFn=" + afn + ", doLearningFirst=" + doLearningFirst);
                    for (int j = 0; j < mln.getnLayers(); j++)
                        System.out.println("Layer " + j + " # params: " + mln.getLayer(j).numParams());
                }

                boolean gradOK = GradientCheckUtil.checkGradients(mln, DEFAULT_EPS, DEFAULT_MAX_REL_ERROR,
                                DEFAULT_MIN_ABS_ERROR, PRINT_RESULTS, RETURN_ON_FIRST_FAILURE, input, labels);

                assertTrue(gradOK);
            }
        }
    }
}
 
Example 19
Source File: OutputLayerTest.java    From deeplearning4j with Apache License 2.0 4 votes vote down vote up
@Test
public void testCnnLossLayer(){

    for(WorkspaceMode ws : WorkspaceMode.values()) {
        log.info("*** Testing workspace: " + ws);

        for (Activation a : new Activation[]{Activation.TANH, Activation.SELU}) {
            //Check that (A+identity) is equal to (identity+A), for activation A
            //i.e., should get same output and weight gradients for both

            MultiLayerConfiguration conf1 =
                    new NeuralNetConfiguration.Builder().seed(12345L)
                            .updater(new NoOp())
                            .convolutionMode(ConvolutionMode.Same)
                            .inferenceWorkspaceMode(ws)
                            .trainingWorkspaceMode(ws)
                            .list()
                            .layer(new ConvolutionLayer.Builder().nIn(3).nOut(4).activation(Activation.IDENTITY)
                                    .kernelSize(2, 2).stride(1, 1)
                                    .dist(new NormalDistribution(0, 1.0))
                                    .updater(new NoOp()).build())
                            .layer(new CnnLossLayer.Builder(LossFunction.MSE)
                                    .activation(a)
                                    .build())
                            .build();

            MultiLayerConfiguration conf2 =
                    new NeuralNetConfiguration.Builder().seed(12345L)
                            .updater(new NoOp())
                            .convolutionMode(ConvolutionMode.Same)
                            .inferenceWorkspaceMode(ws)
                            .trainingWorkspaceMode(ws)
                            .list()
                            .layer(new ConvolutionLayer.Builder().nIn(3).nOut(4).activation(a)
                                    .kernelSize(2, 2).stride(1, 1)
                                    .dist(new NormalDistribution(0, 1.0))
                                    .updater(new NoOp()).build())
                            .layer(new CnnLossLayer.Builder(LossFunction.MSE)
                                    .activation(Activation.IDENTITY)
                                    .build())
                            .build();

            MultiLayerNetwork mln = new MultiLayerNetwork(conf1);
            mln.init();

            MultiLayerNetwork mln2 = new MultiLayerNetwork(conf2);
            mln2.init();


            mln2.setParams(mln.params());


            INDArray in = Nd4j.rand(new int[]{3, 3, 5, 5});

            INDArray out1 = mln.output(in);
            INDArray out2 = mln2.output(in);

            assertEquals(out1, out2);

            INDArray labels = Nd4j.rand(out1.shape());

            mln.setInput(in);
            mln.setLabels(labels);

            mln2.setInput(in);
            mln2.setLabels(labels);

            mln.computeGradientAndScore();
            mln2.computeGradientAndScore();

            assertEquals(mln.score(), mln2.score(), 1e-6);
            assertEquals(mln.gradient().gradient(), mln2.gradient().gradient());

            //Also check computeScoreForExamples
            INDArray in2a = Nd4j.rand(new int[]{1, 3, 5, 5});
            INDArray labels2a = Nd4j.rand(new int[]{1, 4, 5, 5});

            INDArray in2 = Nd4j.concat(0, in2a, in2a);
            INDArray labels2 = Nd4j.concat(0, labels2a, labels2a);

            INDArray s = mln.scoreExamples(new DataSet(in2, labels2), false);
            assertArrayEquals(new long[]{2, 1}, s.shape());
            assertEquals(s.getDouble(0), s.getDouble(1), 1e-6);

            TestUtils.testModelSerialization(mln);
        }
    }
}
 
Example 20
Source File: EmbeddingLayerTest.java    From deeplearning4j with Apache License 2.0 4 votes vote down vote up
@Test
public void testEmbeddingBackwardPass() {
    //With the same parameters, embedding layer should have same activations as the equivalent one-hot representation
    // input with a DenseLayer

    int nClassesIn = 10;

    MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder().activation(Activation.TANH).list()
            .layer(0, new EmbeddingLayer.Builder().hasBias(true).nIn(nClassesIn).nOut(5).build()).layer(1,
                    new OutputLayer.Builder(LossFunctions.LossFunction.MCXENT).nIn(5).nOut(4)
                            .activation(Activation.SOFTMAX).build())
            .build();
    MultiLayerConfiguration conf2 = new NeuralNetConfiguration.Builder().activation(Activation.TANH)
            .weightInit(WeightInit.XAVIER).list()
            .layer(new DenseLayer.Builder().nIn(nClassesIn).nOut(5).activation(Activation.IDENTITY).build())
            .layer(new OutputLayer.Builder(LossFunctions.LossFunction.MCXENT).nIn(5).nOut(4)
                            .activation(Activation.SOFTMAX).build())
            .build();

    MultiLayerNetwork net = new MultiLayerNetwork(conf);
    MultiLayerNetwork net2 = new MultiLayerNetwork(conf2);
    net.init();
    net2.init();

    net2.setParams(net.params().dup());

    int batchSize = 3;
    INDArray inEmbedding = Nd4j.create(batchSize, 1);
    INDArray inOneHot = Nd4j.create(batchSize, nClassesIn);
    INDArray outLabels = Nd4j.create(batchSize, 4);

    Random r = new Random(12345);
    for (int i = 0; i < batchSize; i++) {
        int classIdx = r.nextInt(nClassesIn);
        inEmbedding.putScalar(i, classIdx);
        inOneHot.putScalar(new int[]{i, classIdx}, 1.0);

        int labelIdx = r.nextInt(4);
        outLabels.putScalar(new int[]{i, labelIdx}, 1.0);
    }

    net.setInput(inEmbedding);
    net2.setInput(inOneHot);
    net.setLabels(outLabels);
    net2.setLabels(outLabels);

    net.computeGradientAndScore();
    net2.computeGradientAndScore();

    assertEquals(net2.score(), net.score(), 1e-6);

    Map<String, INDArray> gradient = net.gradient().gradientForVariable();
    Map<String, INDArray> gradient2 = net2.gradient().gradientForVariable();
    assertEquals(gradient.size(), gradient2.size());

    for (String s : gradient.keySet()) {
        assertEquals(gradient2.get(s), gradient.get(s));
    }
}