Java Code Examples for org.nd4j.linalg.activations.Activation#IDENTITY

The following examples show how to use org.nd4j.linalg.activations.Activation#IDENTITY . You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: KerasActivationUtils.java    From deeplearning4j with Apache License 2.0 5 votes vote down vote up
/**
 * Map Keras to DL4J activation functions.
 *
 * @param conf Keras layer configuration
 * @param kerasActivation String containing Keras activation function name
 * @return Activation enum value containing DL4J activation function name
 */
public static Activation mapToActivation(String kerasActivation, KerasLayerConfiguration conf)
        throws UnsupportedKerasConfigurationException {
    Activation dl4jActivation;
    if (kerasActivation.equals(conf.getKERAS_ACTIVATION_SOFTMAX())) {
        dl4jActivation = Activation.SOFTMAX;
    } else if (kerasActivation.equals(conf.getKERAS_ACTIVATION_SOFTPLUS())) {
        dl4jActivation = Activation.SOFTPLUS;
    } else if (kerasActivation.equals(conf.getKERAS_ACTIVATION_SOFTSIGN())) {
        dl4jActivation = Activation.SOFTSIGN;
    } else if (kerasActivation.equals(conf.getKERAS_ACTIVATION_RELU())) {
        dl4jActivation = Activation.RELU;
    } else if (kerasActivation.equals(conf.getKERAS_ACTIVATION_RELU6())) {
        dl4jActivation = Activation.RELU6;
    } else if (kerasActivation.equals(conf.getKERAS_ACTIVATION_ELU())) {
        dl4jActivation = Activation.ELU;
    } else if (kerasActivation.equals(conf.getKERAS_ACTIVATION_SELU())) {
        dl4jActivation = Activation.SELU;
    } else if (kerasActivation.equals(conf.getKERAS_ACTIVATION_TANH())) {
        dl4jActivation = Activation.TANH;
    } else if (kerasActivation.equals(conf.getKERAS_ACTIVATION_SIGMOID())) {
        dl4jActivation = Activation.SIGMOID;
    } else if (kerasActivation.equals(conf.getKERAS_ACTIVATION_HARD_SIGMOID())) {
        dl4jActivation = Activation.HARDSIGMOID;
    } else if (kerasActivation.equals(conf.getKERAS_ACTIVATION_LINEAR())) {
        dl4jActivation = Activation.IDENTITY;
    } else if (kerasActivation.equals(conf.getKERAS_ACTIVATION_SWISH())) {
        dl4jActivation = Activation.SWISH;
    } else {
        throw new UnsupportedKerasConfigurationException(
                "Unknown Keras activation function " + kerasActivation);
    }
    return dl4jActivation;
}
 
Example 2
Source File: GaussianReconstructionDistribution.java    From deeplearning4j with Apache License 2.0 4 votes vote down vote up
/**
 * Create a GaussianReconstructionDistribution with the default identity activation function.
 */
public GaussianReconstructionDistribution() {
    this(Activation.IDENTITY);
}
 
Example 3
Source File: GradientCheckTests.java    From deeplearning4j with Apache License 2.0 4 votes vote down vote up
@Test
    public void elementWiseMultiplicationLayerTest(){

        for(Activation a : new Activation[]{Activation.IDENTITY, Activation.TANH}) {

            ComputationGraphConfiguration conf = new NeuralNetConfiguration.Builder()
                    .dataType(DataType.DOUBLE)
                    .optimizationAlgo(OptimizationAlgorithm.CONJUGATE_GRADIENT).updater(new NoOp())
                    .seed(12345L)
                    .weightInit(new UniformDistribution(0, 1))
                    .graphBuilder()
                    .addInputs("features")
                    .addLayer("dense", new DenseLayer.Builder().nIn(4).nOut(4)
                            .activation(Activation.TANH)
                            .build(), "features")
                    .addLayer("elementWiseMul", new ElementWiseMultiplicationLayer.Builder().nIn(4).nOut(4)
                            .activation(a)
                            .build(), "dense")
                    .addLayer("loss", new LossLayer.Builder(LossFunctions.LossFunction.COSINE_PROXIMITY)
                            .activation(Activation.IDENTITY).build(), "elementWiseMul")
                    .setOutputs("loss")
                    .build();

            ComputationGraph netGraph = new ComputationGraph(conf);
            netGraph.init();

            log.info("params before learning: " + netGraph.getLayer(1).paramTable());

            //Run a number of iterations of learning manually make some pseudo data
            //the ides is simple: since we do a element wise multiplication layer (just a scaling), we want the cos sim
            // is mainly decided by the fourth value, if everything runs well, we will get a large weight for the fourth value

            INDArray features = Nd4j.create(new double[][]{{1, 2, 3, 4}, {1, 2, 3, 1}, {1, 2, 3, 0}});
            INDArray labels = Nd4j.create(new double[][]{{1, 1, 1, 8}, {1, 1, 1, 2}, {1, 1, 1, 1}});

            netGraph.setInputs(features);
            netGraph.setLabels(labels);
            netGraph.computeGradientAndScore();
            double scoreBefore = netGraph.score();

            String msg;
            for (int epoch = 0; epoch < 5; epoch++)
                netGraph.fit(new INDArray[]{features}, new INDArray[]{labels});
            netGraph.computeGradientAndScore();
            double scoreAfter = netGraph.score();
            //Can't test in 'characteristic mode of operation' if not learning
            msg = "elementWiseMultiplicationLayerTest() - score did not (sufficiently) decrease during learning - activationFn="
                    + "Id" + ", lossFn=" + "Cos-sim" + ", outputActivation=" + "Id"
                    + ", doLearningFirst=" + "true" + " (before=" + scoreBefore
                    + ", scoreAfter=" + scoreAfter + ")";
            assertTrue(msg, scoreAfter < 0.8 * scoreBefore);

//        expectation in case linear regression(with only element wise multiplication layer): large weight for the fourth weight
            log.info("params after learning: " + netGraph.getLayer(1).paramTable());

            boolean gradOK = GradientCheckUtil.checkGradients(new GradientCheckUtil.GraphConfig().net(netGraph).inputs(new INDArray[]{features})
                    .labels(new INDArray[]{labels}));

            msg = "elementWiseMultiplicationLayerTest() - activationFn=" + "ID" + ", lossFn=" + "Cos-sim"
                    + ", outputActivation=" + "Id" + ", doLearningFirst=" + "true";
            assertTrue(msg, gradOK);

            TestUtils.testModelSerialization(netGraph);
        }
    }
 
Example 4
Source File: GradientCheckTestsMasking.java    From deeplearning4j with Apache License 2.0 4 votes vote down vote up
@Test
public void gradientCheckMaskingOutputSimple() {

    int timeSeriesLength = 5;
    boolean[][] mask = new boolean[5][0];
    mask[0] = new boolean[] {true, true, true, true, true}; //No masking
    mask[1] = new boolean[] {false, true, true, true, true}; //mask first output time step
    mask[2] = new boolean[] {false, false, false, false, true}; //time series classification: mask all but last
    mask[3] = new boolean[] {false, false, true, false, true}; //time series classification w/ variable length TS
    mask[4] = new boolean[] {true, true, true, false, true}; //variable length TS

    int nIn = 3;
    int layerSize = 3;

    GradientCheckSimpleScenario[] scenarios = new GradientCheckSimpleScenario[] {
                    new GradientCheckSimpleScenario(LossFunctions.LossFunction.MCXENT.getILossFunction(),
                                    Activation.SOFTMAX, 2, 2),
                    new GradientCheckSimpleScenario(LossMixtureDensity.builder().gaussians(2).labelWidth(3).build(),
                                    Activation.TANH, 10, 3),
                    new GradientCheckSimpleScenario(LossMixtureDensity.builder().gaussians(2).labelWidth(4).build(),
                                    Activation.IDENTITY, 12, 4)};

    for (GradientCheckSimpleScenario s : scenarios) {

        Random r = new Random(12345L);
        INDArray input = Nd4j.rand(DataType.DOUBLE, 1, nIn, timeSeriesLength).subi(0.5);

        INDArray labels = Nd4j.zeros(DataType.DOUBLE, 1, s.labelWidth, timeSeriesLength);
        for (int m = 0; m < 1; m++) {
            for (int j = 0; j < timeSeriesLength; j++) {
                int idx = r.nextInt(s.labelWidth);
                labels.putScalar(new int[] {m, idx, j}, 1.0f);
            }
        }

        for (int i = 0; i < mask.length; i++) {

            //Create mask array:
            INDArray maskArr = Nd4j.create(1, timeSeriesLength);
            for (int j = 0; j < mask[i].length; j++) {
                maskArr.putScalar(new int[] {0, j}, mask[i][j] ? 1.0 : 0.0);
            }

            MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder().seed(12345L)
                    .dataType(DataType.DOUBLE)
                    .updater(new NoOp())
                    .list()
                    .layer(0, new SimpleRnn.Builder().nIn(nIn).nOut(layerSize)
                            .weightInit(new NormalDistribution(0, 1)).build())
                    .layer(1, new RnnOutputLayer.Builder(s.lf).activation(s.act).nIn(layerSize).nOut(s.nOut)
                            .weightInit(new NormalDistribution(0, 1)).build())
                    .build();
            MultiLayerNetwork mln = new MultiLayerNetwork(conf);
            mln.init();

            boolean gradOK = GradientCheckUtil.checkGradients(new GradientCheckUtil.MLNConfig().net(mln).input(input)
                    .labels(labels).labelMask(maskArr));

            String msg = "gradientCheckMaskingOutputSimple() - timeSeriesLength=" + timeSeriesLength
                            + ", miniBatchSize=" + 1;
            assertTrue(msg, gradOK);
            TestUtils.testModelSerialization(mln);
        }
    }
}
 
Example 5
Source File: BNGradientCheckTest.java    From deeplearning4j with Apache License 2.0 4 votes vote down vote up
@Test
    public void testGradientBNWithCNNandSubsamplingCompGraph() {
        //Parameterized test, testing combinations of:
        // (a) activation function
        // (b) Whether to test at random initialization, or after some learning (i.e., 'characteristic mode of operation')
        // (c) Loss function (with specified output activations)
        // (d) l1 and l2 values
        Activation[] activFns = {Activation.TANH, Activation.IDENTITY};
        boolean doLearningFirst = true;

        LossFunctions.LossFunction[] lossFunctions = {LossFunctions.LossFunction.NEGATIVELOGLIKELIHOOD};
        Activation[] outputActivations = {Activation.SOFTMAX}; //i.e., lossFunctions[i] used with outputActivations[i] here

        double[] l2vals = {0.0, 0.1};
        double[] l1vals = {0.0, 0.2}; //i.e., use l2vals[j] with l1vals[j]

        Nd4j.getRandom().setSeed(12345);
        int minibatch = 10;
        int depth = 2;
        int hw = 5;
        int nOut = 3;
        INDArray input = Nd4j.rand(new int[]{minibatch, depth, hw, hw});
        INDArray labels = Nd4j.zeros(minibatch, nOut);
        Random r = new Random(12345);
        for (int i = 0; i < minibatch; i++) {
            labels.putScalar(i, r.nextInt(nOut), 1.0);
        }

        DataSet ds = new DataSet(input, labels);

        for (boolean useLogStd : new boolean[]{true, false}) {
            for (Activation afn : activFns) {
                for (int i = 0; i < lossFunctions.length; i++) {
                    for (int j = 0; j < l2vals.length; j++) {
                        LossFunctions.LossFunction lf = lossFunctions[i];
                        Activation outputActivation = outputActivations[i];

                        ComputationGraphConfiguration conf = new NeuralNetConfiguration.Builder().seed(12345)
                                .dataType(DataType.DOUBLE)
                                .optimizationAlgo(OptimizationAlgorithm.LINE_GRADIENT_DESCENT)
                                .updater(new NoOp())
                                .dist(new UniformDistribution(-2, 2)).seed(12345L).graphBuilder()
                                .addInputs("in")
                                .addLayer("0", new ConvolutionLayer.Builder(2, 2).stride(1, 1).nOut(3)
                                        .activation(afn).build(), "in")
                                .addLayer("1", new BatchNormalization.Builder().useLogStd(useLogStd).build(), "0")
                                .addLayer("2", new SubsamplingLayer.Builder(SubsamplingLayer.PoolingType.MAX)
                                        .kernelSize(2, 2).stride(1, 1).build(), "1")
                                .addLayer("3", new BatchNormalization.Builder().useLogStd(useLogStd).build(), "2")
                                .addLayer("4", new ActivationLayer.Builder().activation(afn).build(), "3")
                                .addLayer("5", new OutputLayer.Builder(lf).activation(outputActivation)
                                        .nOut(nOut).build(), "4")
                                .setOutputs("5").setInputTypes(InputType.convolutional(hw, hw, depth))
                                .build();

                        ComputationGraph net = new ComputationGraph(conf);
                        net.init();
                        String name = new Object() {
                        }.getClass().getEnclosingMethod().getName();

                        if (doLearningFirst) {
                            //Run a number of iterations of learning
                            net.setInput(0, ds.getFeatures());
                            net.setLabels(ds.getLabels());
                            net.computeGradientAndScore();
                            double scoreBefore = net.score();
                            for (int k = 0; k < 20; k++)
                                net.fit(ds);
                            net.computeGradientAndScore();
                            double scoreAfter = net.score();
                            //Can't test in 'characteristic mode of operation' if not learning
                            String msg = name
                                    + " - score did not (sufficiently) decrease during learning - activationFn="
                                    + afn + ", lossFn=" + lf + ", outputActivation=" + outputActivation
                                    + ", doLearningFirst= " + doLearningFirst + " (before=" + scoreBefore
                                    + ", scoreAfter=" + scoreAfter + ")";
                            assertTrue(msg, scoreAfter < 0.9 * scoreBefore);
                        }

                        System.out.println(name + " - activationFn=" + afn + ", lossFn=" + lf
                                + ", outputActivation=" + outputActivation + ", doLearningFirst="
                                + doLearningFirst + ", l1=" + l1vals[j] + ", l2=" + l2vals[j]);
//                        for (int k = 0; k < net.getNumLayers(); k++)
//                            System.out.println("Layer " + k + " # params: " + net.getLayer(k).numParams());

                        //Mean and variance vars are not gradient checkable; mean/variance "gradient" is used to implement running mean/variance calc
                        //i.e., runningMean = decay * runningMean + (1-decay) * batchMean
                        //However, numerical gradient will be 0 as forward pass doesn't depend on this "parameter"
                        Set<String> excludeParams = new HashSet<>(Arrays.asList("1_mean", "1_var", "3_mean", "3_var", "1_log10stdev", "3_log10stdev"));
                        boolean gradOK = GradientCheckUtil.checkGradients(new GradientCheckUtil.GraphConfig().net(net).inputs(new INDArray[]{input})
                                .labels(new INDArray[]{labels}).excludeParams(excludeParams));

                        assertTrue(gradOK);
                        TestUtils.testModelSerialization(net);
                    }
                }
            }
        }
    }
 
Example 6
Source File: CNNGradientCheckTest.java    From deeplearning4j with Apache License 2.0 4 votes vote down vote up
@Test
    public void testGradientCNNL1L2MLN() {
        if(this.format != CNN2DFormat.NCHW) //Only test NCHW due to flat input format...
            return;

        //Parameterized test, testing combinations of:
        // (a) activation function
        // (b) Whether to test at random initialization, or after some learning (i.e., 'characteristic mode of operation')
        // (c) Loss function (with specified output activations)

        DataSet ds = new IrisDataSetIterator(150, 150).next();
        ds.normalizeZeroMeanZeroUnitVariance();
        INDArray input = ds.getFeatures();
        INDArray labels = ds.getLabels();

        //use l2vals[i] with l1vals[i]
        double[] l2vals = {0.4, 0.0, 0.4, 0.4};
        double[] l1vals = {0.0, 0.0, 0.5, 0.0};
        double[] biasL2 = {0.0, 0.0, 0.0, 0.2};
        double[] biasL1 = {0.0, 0.0, 0.6, 0.0};
        Activation[] activFns = {Activation.SIGMOID, Activation.TANH, Activation.ELU, Activation.SOFTPLUS};
        boolean[] characteristic = {false, true, false, true}; //If true: run some backprop steps first

        LossFunctions.LossFunction[] lossFunctions =
                {LossFunctions.LossFunction.NEGATIVELOGLIKELIHOOD, LossFunctions.LossFunction.MSE, LossFunctions.LossFunction.NEGATIVELOGLIKELIHOOD, LossFunctions.LossFunction.MSE};
        Activation[] outputActivations = {Activation.SOFTMAX, Activation.TANH, Activation.SOFTMAX, Activation.IDENTITY}; //i.e., lossFunctions[i] used with outputActivations[i] here

        for( int i=0; i<l2vals.length; i++ ){
            Activation afn = activFns[i];
            boolean doLearningFirst = characteristic[i];
            LossFunctions.LossFunction lf = lossFunctions[i];
            Activation outputActivation = outputActivations[i];
            double l2 = l2vals[i];
            double l1 = l1vals[i];

            MultiLayerConfiguration.Builder builder = new NeuralNetConfiguration.Builder()
                    .dataType(DataType.DOUBLE)
                    .l2(l2).l1(l1).l2Bias(biasL2[i]).l1Bias(biasL1[i])
                    .optimizationAlgo(
                            OptimizationAlgorithm.CONJUGATE_GRADIENT)
                    .seed(12345L).list()
                    .layer(0, new ConvolutionLayer.Builder(new int[]{1, 1}).nIn(1).nOut(6)
                            .weightInit(WeightInit.XAVIER).activation(afn)
                            .updater(new NoOp()).build())
                    .layer(1, new OutputLayer.Builder(lf).activation(outputActivation).nOut(3)
                            .weightInit(WeightInit.XAVIER).updater(new NoOp()).build())

                    .setInputType(InputType.convolutionalFlat(1, 4, 1));

            MultiLayerConfiguration conf = builder.build();

            MultiLayerNetwork mln = new MultiLayerNetwork(conf);
            mln.init();
            String testName = new Object() {
            }.getClass().getEnclosingMethod().getName();

            if (doLearningFirst) {
                //Run a number of iterations of learning
                mln.setInput(ds.getFeatures());
                mln.setLabels(ds.getLabels());
                mln.computeGradientAndScore();
                double scoreBefore = mln.score();
                for (int j = 0; j < 10; j++)
                    mln.fit(ds);
                mln.computeGradientAndScore();
                double scoreAfter = mln.score();
                //Can't test in 'characteristic mode of operation' if not learning
                String msg = testName
                        + "- score did not (sufficiently) decrease during learning - activationFn="
                        + afn + ", lossFn=" + lf + ", outputActivation=" + outputActivation
                        + ", doLearningFirst=" + doLearningFirst + " (before=" + scoreBefore
                        + ", scoreAfter=" + scoreAfter + ")";
                assertTrue(msg, scoreAfter < 0.8 * scoreBefore);
            }

            if (PRINT_RESULTS) {
                System.out.println(testName + "- activationFn=" + afn + ", lossFn=" + lf
                        + ", outputActivation=" + outputActivation + ", doLearningFirst="
                        + doLearningFirst);
//                for (int j = 0; j < mln.getnLayers(); j++)
//                    System.out.println("Layer " + j + " # params: " + mln.getLayer(j).numParams());
            }

            boolean gradOK = GradientCheckUtil.checkGradients(mln, DEFAULT_EPS, DEFAULT_MAX_REL_ERROR,
                    DEFAULT_MIN_ABS_ERROR, PRINT_RESULTS, RETURN_ON_FIRST_FAILURE, input, labels);

            assertTrue(gradOK);
            TestUtils.testModelSerialization(mln);
        }
    }
 
Example 7
Source File: VaeGradientCheckTests.java    From deeplearning4j with Apache License 2.0 4 votes vote down vote up
@Test
    public void testVaeAsMLP() {
        //Post pre-training: a VAE can be used as a MLP, by taking the mean value from p(z|x) as the output
        //This gradient check tests this part

        Activation[] activFns = {Activation.IDENTITY, Activation.TANH, Activation.IDENTITY, Activation.TANH, Activation.IDENTITY, Activation.TANH};

        LossFunction[] lossFunctions = {LossFunction.MCXENT, LossFunction.MCXENT, LossFunction.MSE, LossFunction.MSE, LossFunction.MCXENT, LossFunction.MSE};
        Activation[] outputActivations = {Activation.SOFTMAX, Activation.SOFTMAX, Activation.TANH, Activation.TANH, Activation.SOFTMAX, Activation.TANH};

        //use l2vals[i] with l1vals[i]
        double[] l2vals = {0.4, 0.0, 0.4, 0.4, 0.0, 0.0};
        double[] l1vals = {0.0, 0.0, 0.5, 0.0, 0.0, 0.5};
        double[] biasL2 = {0.0, 0.0, 0.0, 0.2, 0.0, 0.4};
        double[] biasL1 = {0.0, 0.0, 0.6, 0.0, 0.0, 0.0};

        int[][] encoderLayerSizes = new int[][] {{5}, {5}, {5, 6}, {5, 6}, {5}, {5, 6}};
        int[][] decoderLayerSizes = new int[][] {{6}, {7, 8}, {6}, {7, 8}, {6}, {7, 8}};

        int[] minibatches = new int[]{1,5,4,3,1,4};

        Nd4j.getRandom().setSeed(12345);
        for( int i=0; i<activFns.length; i++ ){
            LossFunction lf = lossFunctions[i];
            Activation outputActivation = outputActivations[i];
            double l2 = l2vals[i];
            double l1 = l1vals[i];
            int[] encoderSizes = encoderLayerSizes[i];
            int[] decoderSizes = decoderLayerSizes[i];
            int minibatch = minibatches[i];
            INDArray input = Nd4j.rand(minibatch, 4);
            INDArray labels = Nd4j.create(minibatch, 3);
            for (int j = 0; j < minibatch; j++) {
                labels.putScalar(j, j % 3, 1.0);
            }
            Activation afn = activFns[i];

            MultiLayerConfiguration conf =
                    new NeuralNetConfiguration.Builder().l2(l2).l1(l1)
                            .dataType(DataType.DOUBLE)
                            .updater(new NoOp())
                            .l2Bias(biasL2[i]).l1Bias(biasL1[i])
                            .updater(new NoOp()).seed(12345L).list()
                            .layer(0, new VariationalAutoencoder.Builder().nIn(4)
                                    .nOut(3).encoderLayerSizes(encoderSizes)
                                    .decoderLayerSizes(decoderSizes)

                                    .dist(new NormalDistribution(0, 1))
                                    .activation(afn)
                                    .build())
                            .layer(1, new OutputLayer.Builder(lf)
                                    .activation(outputActivation).nIn(3).nOut(3)

                                    .dist(new NormalDistribution(0, 1))
                                    .build())
                            .build();

            MultiLayerNetwork mln = new MultiLayerNetwork(conf);
            mln.init();

            String msg = "testVaeAsMLP() - activationFn=" + afn + ", lossFn=" + lf
                    + ", outputActivation=" + outputActivation + ", encLayerSizes = "
                    + Arrays.toString(encoderSizes) + ", decLayerSizes = "
                    + Arrays.toString(decoderSizes) + ", l2=" + l2 + ", l1=" + l1;
            if (PRINT_RESULTS) {
                System.out.println(msg);
//                for (int j = 0; j < mln.getnLayers(); j++)
//                    System.out.println("Layer " + j + " # params: " + mln.getLayer(j).numParams());
            }

            boolean gradOK = GradientCheckUtil.checkGradients(mln, DEFAULT_EPS, DEFAULT_MAX_REL_ERROR,
                    DEFAULT_MIN_ABS_ERROR, PRINT_RESULTS, RETURN_ON_FIRST_FAILURE, input,
                    labels);
            assertTrue(msg, gradOK);
            TestUtils.testModelSerialization(mln);
        }
    }
 
Example 8
Source File: CNN3DGradientCheckTest.java    From deeplearning4j with Apache License 2.0 4 votes vote down vote up
@Test
public void testDeconv3d() {
    Nd4j.getRandom().setSeed(12345);
    // Note: we checked this with a variety of parameters, but it takes a lot of time.
    int[] depths = {8, 8, 9};
    int[] heights = {8, 9, 9};
    int[] widths = {8, 8, 9};


    int[][] kernels = {{2, 2, 2}, {3, 3, 3}, {2, 3, 2}};
    int[][] strides = {{1, 1, 1}, {1, 1, 1}, {2, 2, 2}};

    Activation[] activations = {Activation.SIGMOID, Activation.TANH, Activation.IDENTITY};

    ConvolutionMode[] modes = {ConvolutionMode.Truncate, ConvolutionMode.Same, ConvolutionMode.Same};
    int[] mbs = {1, 3, 2};
    Convolution3D.DataFormat[] dataFormats = new Convolution3D.DataFormat[]{Convolution3D.DataFormat.NCDHW, Convolution3D.DataFormat.NDHWC, Convolution3D.DataFormat.NCDHW};

    int convNIn = 2;
    int finalNOut = 2;
    int[] deconvOut = {2, 3, 4};

    for (int i = 0; i < activations.length; i++) {
        Activation afn = activations[i];
        int miniBatchSize = mbs[i];
        int depth = depths[i];
        int height = heights[i];
        int width = widths[i];
        ConvolutionMode mode = modes[i];
        int[] kernel = kernels[i];
        int[] stride = strides[i];
        Convolution3D.DataFormat df = dataFormats[i];
        int dOut = deconvOut[i];

        INDArray input;
        if (df == Convolution3D.DataFormat.NDHWC) {
            input = Nd4j.rand(new int[]{miniBatchSize, depth, height, width, convNIn});
        } else {
            input = Nd4j.rand(new int[]{miniBatchSize, convNIn, depth, height, width});
        }
        INDArray labels = Nd4j.zeros(miniBatchSize, finalNOut);
        for (int j = 0; j < miniBatchSize; j++) {
            labels.putScalar(new int[]{j, j % finalNOut}, 1.0);
        }

        MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder()
                .dataType(DataType.DOUBLE)
                .updater(new NoOp())
                .weightInit(new NormalDistribution(0, 0.1))
                .list()
                .layer(0, new Convolution3D.Builder().activation(afn).kernelSize(kernel)
                        .stride(stride).nIn(convNIn).nOut(dOut).hasBias(false)
                        .convolutionMode(mode).dataFormat(df)
                        .build())
                .layer(1, new Deconvolution3D.Builder().activation(afn).kernelSize(kernel)
                        .stride(stride).nOut(dOut).hasBias(false)
                        .convolutionMode(mode).dataFormat(df)
                        .build())
                .layer(new OutputLayer.Builder(LossFunctions.LossFunction.MCXENT)
                        .activation(Activation.SOFTMAX).nOut(finalNOut).build())
                .setInputType(InputType.convolutional3D(df, depth, height, width, convNIn)).build();

        String json = conf.toJson();
        MultiLayerConfiguration c2 = MultiLayerConfiguration.fromJson(json);
        assertEquals(conf, c2);

        MultiLayerNetwork net = new MultiLayerNetwork(conf);
        net.init();

        String msg = "DataFormat = " + df + ", minibatch size = " + miniBatchSize + ", activationFn=" + afn
                + ", kernel = " + Arrays.toString(kernel) + ", stride = "
                + Arrays.toString(stride) + ", mode = " + mode.toString()
                + ", input depth " + depth + ", input height " + height
                + ", input width " + width;

        if (PRINT_RESULTS) {
            log.info(msg);
        }

        boolean gradOK = GradientCheckUtil.checkGradients(new GradientCheckUtil.MLNConfig().net(net).input(input)
                .labels(labels).subset(true).maxPerParam(64));

        assertTrue(msg, gradOK);

        TestUtils.testModelSerialization(net);
    }
}
 
Example 9
Source File: TestSameDiffDense.java    From deeplearning4j with Apache License 2.0 4 votes vote down vote up
@Test
public void testSameDiffDenseForward() {
    for(WorkspaceMode wsm : new WorkspaceMode[]{WorkspaceMode.ENABLED, WorkspaceMode.NONE}) {
        for (int minibatch : new int[]{5, 1}) {
            int nIn = 3;
            int nOut = 4;

            Activation[] afns = new Activation[]{
                    Activation.TANH,
                    Activation.SIGMOID,
                    Activation.ELU,
                    Activation.IDENTITY,
                    Activation.SOFTPLUS,
                    Activation.SOFTSIGN,
                    Activation.CUBE,
                    Activation.HARDTANH,
                    Activation.RELU
            };

            for (Activation a : afns) {
                log.info("Starting test - " + a + ", workspace = " + wsm);
                MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder()
                        .inferenceWorkspaceMode(wsm)
                        .trainingWorkspaceMode(wsm)
                        .list()
                        .layer(new SameDiffDense.Builder().nIn(nIn).nOut(nOut)
                                .activation(a)
                                .build())
                        .build();

                MultiLayerNetwork net = new MultiLayerNetwork(conf);
                net.init();

                assertNotNull(net.paramTable());

                MultiLayerConfiguration conf2 = new NeuralNetConfiguration.Builder()
                        .list()
                        .layer(new DenseLayer.Builder().activation(a).nIn(nIn).nOut(nOut).build())
                        .build();

                MultiLayerNetwork net2 = new MultiLayerNetwork(conf2);
                net2.init();

                net.params().assign(net2.params());

                //Check params:
                assertEquals(net2.params(), net.params());
                Map<String, INDArray> params1 = net.paramTable();
                Map<String, INDArray> params2 = net2.paramTable();
                assertEquals(params2, params1);

                INDArray in = Nd4j.rand(minibatch, nIn);
                INDArray out = net.output(in);
                INDArray outExp = net2.output(in);

                assertEquals(outExp, out);

                //Also check serialization:
                MultiLayerNetwork netLoaded = TestUtils.testModelSerialization(net);
                INDArray outLoaded = netLoaded.output(in);

                assertEquals(outExp, outLoaded);

                //Sanity check on different minibatch sizes:
                INDArray newIn = Nd4j.vstack(in, in);
                INDArray outMbsd = net.output(newIn);
                INDArray outMb = net2.output(newIn);
                assertEquals(outMb, outMbsd);
            }
        }
    }
}
 
Example 10
Source File: TestSameDiffDense.java    From deeplearning4j with Apache License 2.0 4 votes vote down vote up
@Test
public void testSameDiffDenseForwardMultiLayer() {
    for(WorkspaceMode wsm : new WorkspaceMode[]{WorkspaceMode.ENABLED, WorkspaceMode.NONE}) {
        for (int minibatch : new int[]{5, 1}) {
            int nIn = 3;
            int nOut = 4;

            Activation[] afns = new Activation[]{
                    Activation.TANH,
                    Activation.SIGMOID,
                    Activation.ELU,
                    Activation.IDENTITY,
                    Activation.SOFTPLUS,
                    Activation.SOFTSIGN,
                    Activation.CUBE,    //https://github.com/deeplearning4j/nd4j/issues/2426
                    Activation.HARDTANH,
                    Activation.RELU      //JVM crash
            };

            for (Activation a : afns) {
                log.info("Starting test - " + a + " - workspace=" + wsm);
                MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder()
                        .seed(12345)
                        .list()
                        .layer(new SameDiffDense.Builder().nIn(nIn).nOut(nOut)
                                .weightInit(WeightInit.XAVIER)
                                .activation(a).build())
                        .layer(new SameDiffDense.Builder().nIn(nOut).nOut(nOut)
                                .weightInit(WeightInit.XAVIER)
                                .activation(a).build())
                        .layer(new OutputLayer.Builder().nIn(nOut).nOut(nOut)
                                .weightInit(WeightInit.XAVIER)
                                .activation(a).build())
                        .validateOutputLayerConfig(false)
                        .build();

                MultiLayerNetwork net = new MultiLayerNetwork(conf);
                net.init();

                assertNotNull(net.paramTable());

                MultiLayerConfiguration conf2 = new NeuralNetConfiguration.Builder()
                        .seed(12345)
                        .weightInit(WeightInit.XAVIER)
                        .list()
                        .layer(new DenseLayer.Builder().activation(a).nIn(nIn).nOut(nOut).build())
                        .layer(new DenseLayer.Builder().activation(a).nIn(nOut).nOut(nOut).build())
                        .layer(new OutputLayer.Builder().nIn(nOut).nOut(nOut)
                                .activation(a).build())
                        .validateOutputLayerConfig(false)
                        .build();

                MultiLayerNetwork net2 = new MultiLayerNetwork(conf2);
                net2.init();

                assertEquals(net2.params(), net.params());

                //Check params:
                assertEquals(net2.params(), net.params());
                Map<String, INDArray> params1 = net.paramTable();
                Map<String, INDArray> params2 = net2.paramTable();
                assertEquals(params2, params1);

                INDArray in = Nd4j.rand(minibatch, nIn);
                INDArray out = net.output(in);
                INDArray outExp = net2.output(in);

                assertEquals(outExp, out);

                //Also check serialization:
                MultiLayerNetwork netLoaded = TestUtils.testModelSerialization(net);
                INDArray outLoaded = netLoaded.output(in);

                assertEquals(outExp, outLoaded);


                //Sanity check different minibatch sizes
                in = Nd4j.rand(2 * minibatch, nIn);
                out = net.output(in);
                outExp = net2.output(in);
                assertEquals(outExp, out);
            }
        }
    }
}
 
Example 11
Source File: TestSameDiffDense.java    From deeplearning4j with Apache License 2.0 4 votes vote down vote up
@Test
    public void testSameDiffDenseBackward() {
        int nIn = 3;
        int nOut = 4;

        for (boolean workspaces : new boolean[]{false, true}) {

            for (int minibatch : new int[]{5, 1}) {

                Activation[] afns = new Activation[]{
                        Activation.TANH,
                        Activation.SIGMOID,
                        Activation.ELU,
                        Activation.IDENTITY,
                        Activation.SOFTPLUS,
                        Activation.SOFTSIGN,
                        Activation.HARDTANH,
                        Activation.CUBE,
                        Activation.RELU
                };

                for (Activation a : afns) {
                    log.info("Starting test - " + a + " - minibatch " + minibatch + ", workspaces: " + workspaces);
                    MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder()
                            .trainingWorkspaceMode(workspaces ? WorkspaceMode.ENABLED : WorkspaceMode.NONE)
                            .inferenceWorkspaceMode(workspaces ? WorkspaceMode.ENABLED : WorkspaceMode.NONE)
                            .list()
                            .layer(new SameDiffDense.Builder().nIn(nIn).nOut(nOut)
                                    .activation(a)
                                    .build())
                            .layer(new OutputLayer.Builder().nIn(nOut).nOut(nOut).activation(Activation.SOFTMAX)
                                    .lossFunction(LossFunctions.LossFunction.MCXENT).build())
                            .build();

                    MultiLayerNetwork netSD = new MultiLayerNetwork(conf);
                    netSD.init();

                    MultiLayerConfiguration conf2 = new NeuralNetConfiguration.Builder()
                            .list()
                            .layer(new DenseLayer.Builder().activation(a).nIn(nIn).nOut(nOut).build())
                            .layer(new OutputLayer.Builder().nIn(nOut).nOut(nOut).activation(Activation.SOFTMAX)
                                    .lossFunction(LossFunctions.LossFunction.MCXENT).build())
                            .build();

                    MultiLayerNetwork netStandard = new MultiLayerNetwork(conf2);
                    netStandard.init();

                    netSD.params().assign(netStandard.params());

                    //Check params:
                    assertEquals(netStandard.params(), netSD.params());
                    assertEquals(netStandard.paramTable(), netSD.paramTable());

                    INDArray in = Nd4j.rand(minibatch, nIn);
                    INDArray l = TestUtils.randomOneHot(minibatch, nOut, 12345);
                    netSD.setInput(in);
                    netStandard.setInput(in);
                    netSD.setLabels(l);
                    netStandard.setLabels(l);

                    netSD.computeGradientAndScore();
                    netStandard.computeGradientAndScore();

                    Gradient gSD = netSD.gradient();
                    Gradient gStd = netStandard.gradient();

                    Map<String, INDArray> m1 = gSD.gradientForVariable();
                    Map<String, INDArray> m2 = gStd.gradientForVariable();

                    assertEquals(m2.keySet(), m1.keySet());

                    for (String s : m1.keySet()) {
                        INDArray i1 = m1.get(s);
                        INDArray i2 = m2.get(s);

                        assertEquals(s, i2, i1);
                    }

                    assertEquals(gStd.gradient(), gSD.gradient());

                    //Sanity check: different minibatch size
                    in = Nd4j.rand(2 * minibatch, nIn);
                    l = TestUtils.randomOneHot(2 * minibatch, nOut, 12345);
                    netSD.setInput(in);
                    netStandard.setInput(in);
                    netSD.setLabels(l);
                    netStandard.setLabels(l);

                    netSD.computeGradientAndScore();
//                    netStandard.computeGradientAndScore();
//                    assertEquals(netStandard.gradient().gradient(), netSD.gradient().gradient());

                    //Sanity check on different minibatch sizes:
                    INDArray newIn = Nd4j.vstack(in, in);
                    INDArray outMbsd = netSD.output(newIn);
                    INDArray outMb = netStandard.output(newIn);
                    assertEquals(outMb, outMbsd);
                }
            }
        }
    }
 
Example 12
Source File: TestSameDiffDense.java    From deeplearning4j with Apache License 2.0 4 votes vote down vote up
@Test
public void gradientCheck() {
    int nIn = 4;
    int nOut = 4;

    for (boolean workspaces : new boolean[]{true, false}) {
        for (Activation a : new Activation[]{Activation.TANH, Activation.IDENTITY}) {

            String msg = "workspaces: " + workspaces + ", " + a;
            Nd4j.getRandom().setSeed(12345);

            MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder()
                    .dataType(DataType.DOUBLE)
                    .seed(12345)
                    .updater(new NoOp())
                    .trainingWorkspaceMode(workspaces ? WorkspaceMode.ENABLED : WorkspaceMode.NONE)
                    .inferenceWorkspaceMode(workspaces ? WorkspaceMode.ENABLED : WorkspaceMode.NONE)
                    .list()
                    .layer(new SameDiffDense.Builder().nIn(nIn).nOut(nOut).activation(a).build())
                    .layer(new SameDiffDense.Builder().nIn(nOut).nOut(nOut).activation(a).build())
                    .layer(new OutputLayer.Builder().nIn(nOut).nOut(nOut).activation(Activation.SOFTMAX)
                            .lossFunction(LossFunctions.LossFunction.MCXENT).build())
                    //.setInputType(InputType.feedForward(nIn))     //TODO
                    .build();

            MultiLayerNetwork net = new MultiLayerNetwork(conf);
            net.init();

            INDArray f = Nd4j.rand(3, nIn);
            INDArray l = TestUtils.randomOneHot(3, nOut);

            log.info("Starting: " + msg);
            boolean gradOK = GradientCheckUtil.checkGradients(net, DEFAULT_EPS, DEFAULT_MAX_REL_ERROR,
                    DEFAULT_MIN_ABS_ERROR, PRINT_RESULTS, RETURN_ON_FIRST_FAILURE, f, l);

            assertTrue(msg, gradOK);

            TestUtils.testModelSerialization(net);

            //Sanity check on different minibatch sizes:
            INDArray newIn = Nd4j.vstack(f, f);
            net.output(newIn);
        }
    }
}
 
Example 13
Source File: TestSameDiffOutput.java    From deeplearning4j with Apache License 2.0 4 votes vote down vote up
@Test
public void testMSEOutputLayer(){       //Faliing 2019/04/17 - https://github.com/deeplearning4j/deeplearning4j/issues/7560
    Nd4j.getRandom().setSeed(12345);

    for(Activation a : new Activation[]{Activation.IDENTITY, Activation.TANH, Activation.SOFTMAX}) {
        log.info("Starting test: " + a);

        MultiLayerConfiguration confSD = new NeuralNetConfiguration.Builder()
                .seed(12345)
                .updater(new Adam(0.01))
                .list()
                .layer(new DenseLayer.Builder().nIn(5).nOut(5).activation(Activation.TANH).build())
                .layer(new SameDiffMSEOutputLayer(5, 5, a, WeightInit.XAVIER))
                .build();

        MultiLayerConfiguration confStd = new NeuralNetConfiguration.Builder()
                .seed(12345)
                .updater(new Adam(0.01))
                .list()
                .layer(new DenseLayer.Builder().nIn(5).nOut(5).activation(Activation.TANH).build())
                .layer(new OutputLayer.Builder().nIn(5).nOut(5).activation(a).lossFunction(LossFunctions.LossFunction.MSE).build())
                .build();

        MultiLayerNetwork netSD = new MultiLayerNetwork(confSD);
        netSD.init();

        MultiLayerNetwork netStd = new MultiLayerNetwork(confStd);
        netStd.init();

        netSD.params().assign(netStd.params());

        assertEquals(netStd.paramTable(), netSD.paramTable());

        int minibatch = 2;
        INDArray in = Nd4j.rand(minibatch, 5);
        INDArray label = Nd4j.rand(minibatch, 5);

        INDArray outSD = netSD.output(in);
        INDArray outStd = netStd.output(in);
        assertEquals(outStd, outSD);

        DataSet ds = new DataSet(in, label);
        double scoreSD = netSD.score(ds);
        double scoreStd = netStd.score(ds);
        assertEquals(scoreStd, scoreSD, 1e-6);

        netSD.setInput(in);
        netSD.setLabels(label);

        netStd.setInput(in);
        netStd.setLabels(label);

        //System.out.println(((SameDiffOutputLayer) netSD.getLayer(1)).sameDiff.summary());

        netSD.computeGradientAndScore();
        netStd.computeGradientAndScore();

        assertEquals(netStd.getFlattenedGradients(), netSD.getFlattenedGradients());

        for (int i = 0; i < 3; i++) {
            netSD.fit(ds);
            netStd.fit(ds);
            String s = String.valueOf(i);
            assertEquals(s, netStd.params(), netSD.params());
            assertEquals(s, netStd.getFlattenedGradients(), netSD.getFlattenedGradients());
        }

        //Test fit before output:
        MultiLayerNetwork net = new MultiLayerNetwork(confSD.clone());
        net.init();
        net.fit(ds);

        //Sanity check on different minibatch sizes:
        INDArray newIn = Nd4j.vstack(in, in);
        INDArray outMbsd = netSD.output(newIn);
        INDArray outMb = netStd.output(newIn);
        assertEquals(outMb, outMbsd);
    }
}
 
Example 14
Source File: TestReconstructionDistributions.java    From deeplearning4j with Apache License 2.0 4 votes vote down vote up
@Test
public void testGaussianLogProb() {
    Nd4j.getRandom().setSeed(12345);

    int inputSize = 4;
    int[] mbs = new int[] {1, 2, 5};

    for (boolean average : new boolean[] {true, false}) {
        for (int minibatch : mbs) {

            INDArray x = Nd4j.rand(minibatch, inputSize);
            INDArray mean = Nd4j.randn(minibatch, inputSize);
            INDArray logStdevSquared = Nd4j.rand(minibatch, inputSize).subi(0.5);

            INDArray distributionParams = Nd4j.createUninitialized(new int[] {minibatch, 2 * inputSize});
            distributionParams.get(NDArrayIndex.all(), NDArrayIndex.interval(0, inputSize)).assign(mean);
            distributionParams.get(NDArrayIndex.all(), NDArrayIndex.interval(inputSize, 2 * inputSize))
                            .assign(logStdevSquared);

            ReconstructionDistribution dist = new GaussianReconstructionDistribution(Activation.IDENTITY);

            double negLogProb = dist.negLogProbability(x, distributionParams, average);

            INDArray exampleNegLogProb = dist.exampleNegLogProbability(x, distributionParams);
            assertArrayEquals(new long[] {minibatch, 1}, exampleNegLogProb.shape());

            //Calculate the same thing, but using Apache Commons math

            double logProbSum = 0.0;
            for (int i = 0; i < minibatch; i++) {
                double exampleSum = 0.0;
                for (int j = 0; j < inputSize; j++) {
                    double mu = mean.getDouble(i, j);
                    double logSigma2 = logStdevSquared.getDouble(i, j);
                    double sigma = Math.sqrt(Math.exp(logSigma2));
                    NormalDistribution nd = new NormalDistribution(mu, sigma);

                    double xVal = x.getDouble(i, j);
                    double thisLogProb = nd.logDensity(xVal);
                    logProbSum += thisLogProb;
                    exampleSum += thisLogProb;
                }
                assertEquals(-exampleNegLogProb.getDouble(i), exampleSum, 1e-6);
            }

            double expNegLogProb;
            if (average) {
                expNegLogProb = -logProbSum / minibatch;
            } else {
                expNegLogProb = -logProbSum;
            }


            //                System.out.println(expLogProb + "\t" + logProb + "\t" + (logProb / expLogProb));
            assertEquals(expNegLogProb, negLogProb, 1e-6);


            //Also: check random sampling...
            int count = minibatch * inputSize;
            INDArray arr = Nd4j.linspace(-3, 3, count, Nd4j.dataType()).reshape(minibatch, inputSize);
            INDArray sampleMean = dist.generateAtMean(arr);
            INDArray sampleRandom = dist.generateRandom(arr);
        }
    }
}
 
Example 15
Source File: JsonTest.java    From deeplearning4j with Apache License 2.0 4 votes vote down vote up
@Test
public void testJsonLossFunctions() {

    ILossFunction[] lossFunctions = new ILossFunction[] {new LossBinaryXENT(), new LossBinaryXENT(),
                    new LossCosineProximity(), new LossHinge(), new LossKLD(), new LossKLD(), new LossL1(),
                    new LossL1(), new LossL2(), new LossL2(), new LossMAE(), new LossMAE(), new LossMAPE(),
                    new LossMAPE(), new LossMCXENT(), new LossMSE(), new LossMSE(), new LossMSLE(), new LossMSLE(),
                    new LossNegativeLogLikelihood(), new LossNegativeLogLikelihood(), new LossPoisson(),
                    new LossSquaredHinge(), new LossFMeasure(), new LossFMeasure(2.0)};

    Activation[] outputActivationFn = new Activation[] {Activation.SIGMOID, //xent
                    Activation.SIGMOID, //xent
                    Activation.TANH, //cosine
                    Activation.TANH, //hinge -> trying to predict 1 or -1
                    Activation.SIGMOID, //kld -> probab so should be between 0 and 1
                    Activation.SOFTMAX, //kld + softmax
                    Activation.TANH, //l1
                    Activation.SOFTMAX, //l1 + softmax
                    Activation.TANH, //l2
                    Activation.SOFTMAX, //l2 + softmax
                    Activation.IDENTITY, //mae
                    Activation.SOFTMAX, //mae + softmax
                    Activation.IDENTITY, //mape
                    Activation.SOFTMAX, //mape + softmax
                    Activation.SOFTMAX, //mcxent
                    Activation.IDENTITY, //mse
                    Activation.SOFTMAX, //mse + softmax
                    Activation.SIGMOID, //msle  -   requires positive labels/activations due to log
                    Activation.SOFTMAX, //msle + softmax
                    Activation.SIGMOID, //nll
                    Activation.SOFTMAX, //nll + softmax
                    Activation.SIGMOID, //poisson - requires positive predictions due to log... not sure if this is the best option
                    Activation.TANH, //squared hinge
                    Activation.SIGMOID, //f-measure (binary, single sigmoid output)
                    Activation.SOFTMAX //f-measure (binary, 2-label softmax output)
    };

    int[] nOut = new int[] {1, //xent
                    3, //xent
                    5, //cosine
                    3, //hinge
                    3, //kld
                    3, //kld + softmax
                    3, //l1
                    3, //l1 + softmax
                    3, //l2
                    3, //l2 + softmax
                    3, //mae
                    3, //mae + softmax
                    3, //mape
                    3, //mape + softmax
                    3, //mcxent
                    3, //mse
                    3, //mse + softmax
                    3, //msle
                    3, //msle + softmax
                    3, //nll
                    3, //nll + softmax
                    3, //poisson
                    3, //squared hinge
                    1, //f-measure (binary, single sigmoid output)
                    2, //f-measure (binary, 2-label softmax output)
    };

    for (int i = 0; i < lossFunctions.length; i++) {

        MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder().seed(12345).updater(Updater.ADAM).list()
                        .layer(0, new DenseLayer.Builder().nIn(4).nOut(nOut[i]).activation(Activation.TANH).build())
                        .layer(1, new LossLayer.Builder().lossFunction(lossFunctions[i])
                                        .activation(outputActivationFn[i]).build())
                        .validateOutputLayerConfig(false).build();

        String json = conf.toJson();
        String yaml = conf.toYaml();

        MultiLayerConfiguration fromJson = MultiLayerConfiguration.fromJson(json);
        MultiLayerConfiguration fromYaml = MultiLayerConfiguration.fromYaml(yaml);

        assertEquals(conf, fromJson);
        assertEquals(conf, fromYaml);
    }
}
 
Example 16
Source File: CNNGradientCheckTest.java    From deeplearning4j with Apache License 2.0 4 votes vote down vote up
@Test
    public void testGradientCNNL1L2MLN() {
        //Parameterized test, testing combinations of:
        // (a) activation function
        // (b) Whether to test at random initialization, or after some learning (i.e., 'characteristic mode of operation')
        // (c) Loss function (with specified output activations)

        DataSet ds = new IrisDataSetIterator(150, 150).next();
        ds.normalizeZeroMeanZeroUnitVariance();
        INDArray input = ds.getFeatures();
        INDArray labels = ds.getLabels();

        //use l2vals[i] with l1vals[i]
        double[] l2vals = {0.4, 0.0, 0.4, 0.4};
        double[] l1vals = {0.0, 0.0, 0.5, 0.0};
        double[] biasL2 = {0.0, 0.0, 0.0, 0.2};
        double[] biasL1 = {0.0, 0.0, 0.6, 0.0};
        Activation[] activFns = {Activation.SIGMOID, Activation.TANH, Activation.ELU, Activation.SOFTPLUS};
        boolean[] characteristic = {false, true, false, true}; //If true: run some backprop steps first

        LossFunctions.LossFunction[] lossFunctions =
                {LossFunctions.LossFunction.NEGATIVELOGLIKELIHOOD, LossFunctions.LossFunction.MSE, LossFunctions.LossFunction.NEGATIVELOGLIKELIHOOD, LossFunctions.LossFunction.MSE};
        Activation[] outputActivations = {Activation.SOFTMAX, Activation.TANH, Activation.SOFTMAX, Activation.IDENTITY}; //i.e., lossFunctions[i] used with outputActivations[i] here

        for( int i=0; i<l2vals.length; i++ ){
            Activation afn = activFns[i];
            boolean doLearningFirst = characteristic[i];
            LossFunctions.LossFunction lf = lossFunctions[i];
            Activation outputActivation = outputActivations[i];
            double l2 = l2vals[i];
            double l1 = l1vals[i];

            MultiLayerConfiguration.Builder builder = new NeuralNetConfiguration.Builder()
                    .dataType(DataType.DOUBLE)
                    .l2(l2).l1(l1).l2Bias(biasL2[i]).l1Bias(biasL1[i])
                    .optimizationAlgo(
                            OptimizationAlgorithm.CONJUGATE_GRADIENT)
                    .seed(12345L).list()
                    .layer(0, new ConvolutionLayer.Builder(new int[]{1, 1}).nIn(1).nOut(6)
                            .weightInit(WeightInit.XAVIER).activation(afn)
                            .updater(new NoOp()).build())
                    .layer(1, new OutputLayer.Builder(lf).activation(outputActivation).nOut(3)
                            .weightInit(WeightInit.XAVIER).updater(new NoOp()).build())

                    .setInputType(InputType.convolutionalFlat(1, 4, 1));

            MultiLayerConfiguration conf = builder.build();

            MultiLayerNetwork mln = new MultiLayerNetwork(conf);
            mln.init();
            String testName = new Object() {
            }.getClass().getEnclosingMethod().getName();

            if (doLearningFirst) {
                //Run a number of iterations of learning
                mln.setInput(ds.getFeatures());
                mln.setLabels(ds.getLabels());
                mln.computeGradientAndScore();
                double scoreBefore = mln.score();
                for (int j = 0; j < 10; j++)
                    mln.fit(ds);
                mln.computeGradientAndScore();
                double scoreAfter = mln.score();
                //Can't test in 'characteristic mode of operation' if not learning
                String msg = testName
                        + "- score did not (sufficiently) decrease during learning - activationFn="
                        + afn + ", lossFn=" + lf + ", outputActivation=" + outputActivation
                        + ", doLearningFirst=" + doLearningFirst + " (before=" + scoreBefore
                        + ", scoreAfter=" + scoreAfter + ")";
                assertTrue(msg, scoreAfter < 0.8 * scoreBefore);
            }

            if (PRINT_RESULTS) {
                System.out.println(testName + "- activationFn=" + afn + ", lossFn=" + lf
                        + ", outputActivation=" + outputActivation + ", doLearningFirst="
                        + doLearningFirst);
//                for (int j = 0; j < mln.getnLayers(); j++)
//                    System.out.println("Layer " + j + " # params: " + mln.getLayer(j).numParams());
            }

            boolean gradOK = GradientCheckUtil.checkGradients(mln, DEFAULT_EPS, DEFAULT_MAX_REL_ERROR,
                    DEFAULT_MIN_ABS_ERROR, PRINT_RESULTS, RETURN_ON_FIRST_FAILURE, input, labels);

            assertTrue(gradOK);
            TestUtils.testModelSerialization(mln);
        }
    }