Java Code Examples for org.nd4j.linalg.lossfunctions.LossFunctions.LossFunction#MCXENT

The following examples show how to use org.nd4j.linalg.lossfunctions.LossFunctions.LossFunction#MCXENT . You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.

Example 1

Source File: TestLossFunctionsSizeChecks.java From nd4j with Apache License 2.0

5 votes

@Test
public void testL2() {
    LossFunction[] lossFunctionList = {LossFunction.MSE, LossFunction.L1, LossFunction.EXPLL, LossFunction.XENT,
                    LossFunction.MCXENT, LossFunction.SQUARED_LOSS, LossFunction.RECONSTRUCTION_CROSSENTROPY,
                    LossFunction.NEGATIVELOGLIKELIHOOD, LossFunction.COSINE_PROXIMITY, LossFunction.HINGE,
                    LossFunction.SQUARED_HINGE, LossFunction.KL_DIVERGENCE, LossFunction.MEAN_ABSOLUTE_ERROR,
                    LossFunction.L2, LossFunction.MEAN_ABSOLUTE_PERCENTAGE_ERROR,
                    LossFunction.MEAN_SQUARED_LOGARITHMIC_ERROR, LossFunction.POISSON};

    testLossFunctions(lossFunctionList);
}

Example 2

Source File: TestLossFunctionsSizeChecks.java From deeplearning4j with Apache License 2.0

5 votes

@Test
public void testL2() {
    LossFunction[] lossFunctionList = {LossFunction.MSE, LossFunction.L1, LossFunction.XENT,
                    LossFunction.MCXENT, LossFunction.SQUARED_LOSS, LossFunction.RECONSTRUCTION_CROSSENTROPY,
                    LossFunction.NEGATIVELOGLIKELIHOOD, LossFunction.COSINE_PROXIMITY, LossFunction.HINGE,
                    LossFunction.SQUARED_HINGE, LossFunction.KL_DIVERGENCE, LossFunction.MEAN_ABSOLUTE_ERROR,
                    LossFunction.L2, LossFunction.MEAN_ABSOLUTE_PERCENTAGE_ERROR,
                    LossFunction.MEAN_SQUARED_LOGARITHMIC_ERROR, LossFunction.POISSON};

    testLossFunctions(lossFunctionList);
}

Example 3

Source File: Vasttext.java From scava with Eclipse Public License 2.0

4 votes

private ComputationGraph VasttextTextualAndNumeric()
{
	Activation activation = null;
	LossFunction loss = null;
	//If multilabel, it is considered according to the book "Deep Learning with Python" to use the following parameters
	if(multiLabel)
	{
		activation = Activation.SIGMOID;
		loss = LossFunction.XENT; //Binary Crossentropy
	}
	else
	{
		//We're using a softmax/cross entropy for the binary classification, as the number of neurons is two. If the number of neurons would be one, then
		//the activation would be sigmoid and the loss binary crossentropy
		activation = Activation.SOFTMAX;
		loss = LossFunction.MCXENT;	//CATEGORICAL_CROSSENTROPY
	}

	System.err.println("LR:"+lr);
	
	System.err.println("Dense:"+denseDimension);

	ComputationGraphConfiguration  nnConf = new NeuralNetConfiguration.Builder()
			.updater(new Adam(lr))
			.weightInit(WeightInit.XAVIER)
			.trainingWorkspaceMode(WorkspaceMode.ENABLED)
               .inferenceWorkspaceMode(WorkspaceMode.ENABLED)
			.graphBuilder()
			.addInputs("Text", "Extra")
			//Embeddings Parts
			.addLayer("Embeddings", new EmbeddingSequenceLayer.Builder()
                       .nIn(textFeaturesSize)
                       .nOut(denseDimension)
                       .activation(Activation.IDENTITY)
                       //.activation(Activation.TANH)
                       //.dropOut(0.0)
                       .build(), "Text")
			.addLayer("GlobalPooling", new GlobalPoolingLayer.Builder()
                       .poolingType(PoolingType.AVG)
                       .poolingDimensions(2)
                       .collapseDimensions(true)
                       //.dropOut(0.0)
                       .build(), "Embeddings")
			//We're merging directly the values from the extra
			.addVertex("Merge", new MergeVertex(), "GlobalPooling","Extra")
			.addLayer("DenseAll", new DenseLayer.Builder()
					.nIn(denseDimension+numericFeaturesSize)
					.nOut(denseDimension/2)
					//.dropOut(0.5)
					//.l2(0.001)
					.build(), "Merge")
			.addLayer("Output", new OutputLayer.Builder()
					//.dropOut(0.5)
					.nIn(denseDimension/2)
                       .nOut(labelsSize)
                       .activation(activation)
                       .lossFunction(loss)
                       .build(), "DenseAll")
			.setOutputs("Output")
			.pretrain(false)
			.backprop(true)
			.build();

	return new ComputationGraph(nnConf);
}

Example 4

Source File: Vasttext.java From scava with Eclipse Public License 2.0

4 votes

private MultiLayerNetwork VasttextTextual()
{
	Activation activation = null;
	LossFunction loss = null;
	//If multilabel, it is considered according to the book "Deep Learning with Python" to use the following parameters
	if(multiLabel)
	{
		activation = Activation.SIGMOID;
		loss = LossFunction.XENT; //Binary Crossentropy
	}
	else
	{
		//We're using a softmax/cross entropy for the binary classification, as the number of neurons is two. If the number of neurons would be one, then
		//the activation would be sigmoid and the loss binary crossentropy
		activation = Activation.SOFTMAX;
		loss = LossFunction.MCXENT;	//CATEGORICAL_CROSSENTROPY
	}

	MultiLayerConfiguration nnConf = new NeuralNetConfiguration.Builder()
               .updater(new Adam(lr))
               .weightInit(WeightInit.XAVIER)
               .trainingWorkspaceMode(WorkspaceMode.ENABLED)
               .inferenceWorkspaceMode(WorkspaceMode.ENABLED)
               .list()
               .layer(0, new EmbeddingSequenceLayer.Builder()
                       .nIn(textFeaturesSize)
                       .nOut(denseDimension)
                       .activation(Activation.IDENTITY)
                       .build())
               .layer(1, new GlobalPoolingLayer.Builder()
                       .poolingType(PoolingType.AVG)
                       .poolingDimensions(2)
                       .collapseDimensions(true)
                       .build())
               .layer(2, new OutputLayer.Builder()
                       .nIn(denseDimension)
                       .nOut(labelsSize)
                       .activation(activation)
                       .lossFunction(loss)
                       .build())
               .pretrain(false).backprop(true).build();

       return new MultiLayerNetwork(nnConf);
}

Example 5

Source File: GradientCheckTests.java From deeplearning4j with Apache License 2.0

4 votes

@Test
    public void testAutoEncoder() {
        //As above (testGradientMLP2LayerIrisSimple()) but with L2, L1, and both L2/L1 applied
        //Need to run gradient through updater, so that L2 can be applied

        Activation[] activFns = {Activation.SIGMOID, Activation.TANH};
        boolean[] characteristic = {false, true}; //If true: run some backprop steps first

        LossFunction[] lossFunctions = {LossFunction.MCXENT, LossFunction.MSE};
        Activation[] outputActivations = {Activation.SOFTMAX, Activation.TANH};

        DataNormalization scaler = new NormalizerMinMaxScaler();
        DataSetIterator iter = new IrisDataSetIterator(150, 150);
        scaler.fit(iter);
        iter.setPreProcessor(scaler);
        DataSet ds = iter.next();
        INDArray input = ds.getFeatures();
        INDArray labels = ds.getLabels();

        NormalizerStandardize norm = new NormalizerStandardize();
        norm.fit(ds);
        norm.transform(ds);

        double[] l2vals = {0.2, 0.0, 0.2};
        double[] l1vals = {0.0, 0.3, 0.3}; //i.e., use l2vals[i] with l1vals[i]

        for (Activation afn : activFns) {
            for (boolean doLearningFirst : characteristic) {
                for (int i = 0; i < lossFunctions.length; i++) {
                    for (int k = 0; k < l2vals.length; k++) {
                        LossFunction lf = lossFunctions[i];
                        Activation outputActivation = outputActivations[i];
                        double l2 = l2vals[k];
                        double l1 = l1vals[k];

                        Nd4j.getRandom().setSeed(12345);
                        MultiLayerConfiguration conf =
                                        new NeuralNetConfiguration.Builder()
                                                        .dataType(DataType.DOUBLE)
                                                        .updater(new NoOp())
                                                        .l2(l2).l1(l1)
                                                        .optimizationAlgo(OptimizationAlgorithm.CONJUGATE_GRADIENT)
                                                        .seed(12345L)
                                                        .dist(new NormalDistribution(0, 1))
                                                        .list().layer(0,
                                                                        new AutoEncoder.Builder().nIn(4).nOut(3)
                                                                                        .activation(afn).build())
                                                        .layer(1, new OutputLayer.Builder(lf).nIn(3).nOut(3)
                                                                        .activation(outputActivation).build())
                                                        .build();

                        MultiLayerNetwork mln = new MultiLayerNetwork(conf);
                        mln.init();

                        String msg;
                        if (doLearningFirst) {
                            //Run a number of iterations of learning
                            mln.setInput(ds.getFeatures());
                            mln.setLabels(ds.getLabels());
                            mln.computeGradientAndScore();
                            double scoreBefore = mln.score();
                            for (int j = 0; j < 10; j++)
                                mln.fit(ds);
                            mln.computeGradientAndScore();
                            double scoreAfter = mln.score();
                            //Can't test in 'characteristic mode of operation' if not learning
                            msg = "testGradMLP2LayerIrisSimple() - score did not (sufficiently) decrease during learning - activationFn="
                                            + afn + ", lossFn=" + lf + ", outputActivation=" + outputActivation
                                            + ", doLearningFirst=" + doLearningFirst + ", l2=" + l2 + ", l1=" + l1
                                            + " (before=" + scoreBefore + ", scoreAfter=" + scoreAfter + ")";
                            assertTrue(msg, scoreAfter < scoreBefore);
                        }

                        msg = "testGradMLP2LayerIrisSimple() - activationFn=" + afn + ", lossFn=" + lf
                                        + ", outputActivation=" + outputActivation + ", doLearningFirst="
                                        + doLearningFirst + ", l2=" + l2 + ", l1=" + l1;
                        if (PRINT_RESULTS) {
                            System.out.println(msg);
//                            for (int j = 0; j < mln.getnLayers(); j++)
//                                System.out.println("Layer " + j + " # params: " + mln.getLayer(j).numParams());
                        }

                        boolean gradOK = GradientCheckUtil.checkGradients(mln, DEFAULT_EPS, DEFAULT_MAX_REL_ERROR,
                                        DEFAULT_MIN_ABS_ERROR, PRINT_RESULTS, RETURN_ON_FIRST_FAILURE, input, labels);
                        assertTrue(msg, gradOK);
                        TestUtils.testModelSerialization(mln);
                    }
                }
            }
        }
    }

Example 6

Source File: GradientCheckTests.java From deeplearning4j with Apache License 2.0

4 votes

@Test
public void testGradientWeightDecay() {

    Activation[] activFns = {Activation.SIGMOID, Activation.TANH, Activation.THRESHOLDEDRELU};
    boolean[] characteristic = {false, true}; //If true: run some backprop steps first

    LossFunction[] lossFunctions = {LossFunction.MCXENT, LossFunction.MSE};
    Activation[] outputActivations = {Activation.SOFTMAX, Activation.TANH}; //i.e., lossFunctions[i] used with outputActivations[i] here

    DataNormalization scaler = new NormalizerMinMaxScaler();
    DataSetIterator iter = new IrisDataSetIterator(150, 150);
    scaler.fit(iter);
    iter.setPreProcessor(scaler);
    DataSet ds = iter.next();

    INDArray input = ds.getFeatures();
    INDArray labels = ds.getLabels();

    //use l2vals[i] with l1vals[i]
    double[] l2vals = {0.4, 0.0, 0.4, 0.4, 0.0, 0.0};
    double[] l1vals = {0.0, 0.0, 0.5, 0.0, 0.5, 0.0};
    double[] biasL2 = {0.0, 0.0, 0.0, 0.2, 0.0, 0.0};
    double[] biasL1 = {0.0, 0.0, 0.6, 0.0, 0.0, 0.5};
    double[] wdVals = {0.0, 0.0, 0.0, 0.0, 0.4, 0.0};
    double[] wdBias = {0.0, 0.0, 0.0, 0.0, 0.0, 0.4};

    for (Activation afn : activFns) {
        for (int i = 0; i < lossFunctions.length; i++) {
            for (int k = 0; k < l2vals.length; k++) {
                LossFunction lf = lossFunctions[i];
                Activation outputActivation = outputActivations[i];
                double l2 = l2vals[k];
                double l1 = l1vals[k];

                MultiLayerConfiguration conf =
                        new NeuralNetConfiguration.Builder().l2(l2).l1(l1)
                                .dataType(DataType.DOUBLE)
                                .l2Bias(biasL2[k]).l1Bias(biasL1[k])
                                .weightDecay(wdVals[k]).weightDecayBias(wdBias[k])
                                .optimizationAlgo(OptimizationAlgorithm.CONJUGATE_GRADIENT)
                                .seed(12345L)
                                .list().layer(0,
                                new DenseLayer.Builder().nIn(4).nOut(3)
                                        .dist(new NormalDistribution(0,
                                                1))
                                        .updater(new NoOp())
                                        .activation(afn).build())
                                .layer(1, new OutputLayer.Builder(lf).nIn(3).nOut(3)
                                        .dist(new NormalDistribution(0, 1))
                                        .updater(new NoOp())
                                        .activation(outputActivation).build())
                                .build();

                MultiLayerNetwork mln = new MultiLayerNetwork(conf);
                mln.init();

                boolean gradOK1 = GradientCheckUtil.checkGradients(mln, DEFAULT_EPS, DEFAULT_MAX_REL_ERROR,
                        DEFAULT_MIN_ABS_ERROR, PRINT_RESULTS, RETURN_ON_FIRST_FAILURE, input, labels);

                String msg = "testGradientWeightDecay() - activationFn=" + afn + ", lossFn=" + lf
                        + ", outputActivation=" + outputActivation + ", l2=" + l2 + ", l1=" + l1;
                assertTrue(msg, gradOK1);

                TestUtils.testModelSerialization(mln);
            }
        }
    }
}

Example 7

Source File: LSTMGradientCheckTests.java From deeplearning4j with Apache License 2.0

4 votes

@Test
    public void testGradientLSTMFull() {

        int timeSeriesLength = 4;
        int nIn = 3;
        int layerSize = 4;
        int nOut = 2;
        int miniBatchSize = 2;

        boolean[] gravesLSTM = new boolean[] {true, false};

        for (boolean graves : gravesLSTM) {

            Random r = new Random(12345L);
            INDArray input = Nd4j.rand(new int[]{miniBatchSize, nIn, timeSeriesLength}, 'f').subi(0.5);

            INDArray labels = Nd4j.zeros(miniBatchSize, nOut, timeSeriesLength);
            for (int i = 0; i < miniBatchSize; i++) {
                for (int j = 0; j < timeSeriesLength; j++) {
                    int idx = r.nextInt(nOut);
                    labels.putScalar(new int[] {i, idx, j}, 1.0f);
                }
            }


            //use l2vals[i] with l1vals[i]
            double[] l2vals = {0.4, 0.0};
            double[] l1vals = {0.0, 0.5};
            double[] biasL2 = {0.3, 0.0};
            double[] biasL1 = {0.0, 0.6};
            Activation[] activFns = {Activation.TANH, Activation.SOFTSIGN};
            LossFunction[] lossFunctions = {LossFunction.MCXENT, LossFunction.MSE};
            Activation[] outputActivations = {Activation.SOFTMAX, Activation.TANH};

            for (int i = 0; i < l2vals.length; i++) {

                LossFunction lf = lossFunctions[i];
                Activation outputActivation = outputActivations[i];
                double l2 = l2vals[i];
                double l1 = l1vals[i];
                Activation afn = activFns[i];

                NeuralNetConfiguration.Builder conf =
                        new NeuralNetConfiguration.Builder()
                                .dataType(DataType.DOUBLE)
                                .seed(12345L)
                                .dist(new NormalDistribution(0, 1)).updater(new NoOp());

                if (l1 > 0.0)
                    conf.l1(l1);
                if (l2 > 0.0)
                    conf.l2(l2);
                if (biasL2[i] > 0)
                    conf.l2Bias(biasL2[i]);
                if (biasL1[i] > 0)
                    conf.l1Bias(biasL1[i]);

                Layer layer;
                if (graves) {
                    layer = new GravesLSTM.Builder().nIn(nIn).nOut(layerSize).activation(afn).build();
                } else {
                    layer = new LSTM.Builder().nIn(nIn).nOut(layerSize).activation(afn).build();
                }

                NeuralNetConfiguration.ListBuilder conf2 = conf.list().layer(0, layer)
                        .layer(1, new RnnOutputLayer.Builder(lf).activation(outputActivation)
                                .nIn(layerSize).nOut(nOut).build())
                        ;

                MultiLayerNetwork mln = new MultiLayerNetwork(conf2.build());
                mln.init();

                String testName = "testGradientLSTMFull(" + (graves ? "GravesLSTM" : "LSTM")
                        + " - activationFn=" + afn + ", lossFn=" + lf + ", outputActivation="
                        + outputActivation + ", l2=" + l2 + ", l1=" + l1;
                if (PRINT_RESULTS) {
                    System.out.println(testName);
//                    for (int j = 0; j < mln.getnLayers(); j++)
//                        System.out.println("Layer " + j + " # params: " + mln.getLayer(j).numParams());
                }

                boolean gradOK = GradientCheckUtil.checkGradients(new GradientCheckUtil.MLNConfig().net(mln).input(input)
                        .labels(labels).subset(true).maxPerParam(128));

                assertTrue(testName, gradOK);
                TestUtils.testModelSerialization(mln);
            }
        }
    }

Example 8

Source File: LSTMGradientCheckTests.java From deeplearning4j with Apache License 2.0

4 votes

@Test
    public void testGradientGravesBidirectionalLSTMFull() {
        Activation[] activFns = {Activation.TANH, Activation.SOFTSIGN};

        LossFunction[] lossFunctions = {LossFunction.MCXENT, LossFunction.MSE};
        Activation[] outputActivations = {Activation.SOFTMAX, Activation.TANH}; //i.e., lossFunctions[i] used with outputActivations[i] here

        int timeSeriesLength = 3;
        int nIn = 2;
        int layerSize = 2;
        int nOut = 2;
        int miniBatchSize = 3;

        Random r = new Random(12345L);
        INDArray input = Nd4j.rand(DataType.DOUBLE, miniBatchSize, nIn, timeSeriesLength).subi(0.5);

        INDArray labels = TestUtils.randomOneHotTimeSeries(miniBatchSize, nOut, timeSeriesLength);

        //use l2vals[i] with l1vals[i]
        double[] l2vals = {0.4, 0.0};
        double[] l1vals = {0.5, 0.0};
        double[] biasL2 = {0.0, 0.2};
        double[] biasL1 = {0.0, 0.6};

        for (int i = 0; i < lossFunctions.length; i++) {
            for (int k = 0; k < l2vals.length; k++) {
                Activation afn = activFns[i];
                LossFunction lf = lossFunctions[i];
                Activation outputActivation = outputActivations[i];
                double l2 = l2vals[k];
                double l1 = l1vals[k];

                NeuralNetConfiguration.Builder conf =
                        new NeuralNetConfiguration.Builder();
                if (l1 > 0.0)
                    conf.l1(l1);
                if (l2 > 0.0)
                    conf.l2(l2);
                if (biasL2[k] > 0)
                    conf.l2Bias(biasL2[k]);
                if (biasL1[k] > 0)
                    conf.l1Bias(biasL1[k]);

                MultiLayerConfiguration mlc = conf.seed(12345L)
                        .dataType(DataType.DOUBLE)
                        .updater(new NoOp())
                        .list().layer(0,
                                new GravesBidirectionalLSTM.Builder().nIn(nIn).nOut(layerSize)
                                        .weightInit(new NormalDistribution(0, 1))
                                        .activation(afn)
                                        .build())
                        .layer(1, new RnnOutputLayer.Builder(lf).activation(outputActivation).nIn(layerSize)
                                .nOut(nOut)
                                .dist(new NormalDistribution(0, 1)).updater(new NoOp()).build())
                        .build();


                MultiLayerNetwork mln = new MultiLayerNetwork(mlc);

                mln.init();

                if (PRINT_RESULTS) {
                    System.out.println("testGradientGravesBidirectionalLSTMFull() - activationFn=" + afn
                            + ", lossFn=" + lf + ", outputActivation=" + outputActivation + ", l2=" + l2
                            + ", l1=" + l1);
//                    for (int j = 0; j < mln.getnLayers(); j++)
//                        System.out.println("Layer " + j + " # params: " + mln.getLayer(j).numParams());
                }

                boolean gradOK = GradientCheckUtil.checkGradients(mln, DEFAULT_EPS, DEFAULT_MAX_REL_ERROR,
                        DEFAULT_MIN_ABS_ERROR, PRINT_RESULTS, RETURN_ON_FIRST_FAILURE, input, labels);

                String msg = "testGradientGravesLSTMFull() - activationFn=" + afn + ", lossFn=" + lf
                        + ", outputActivation=" + outputActivation + ", l2=" + l2 + ", l1=" + l1;
                assertTrue(msg, gradOK);
                TestUtils.testModelSerialization(mln);
            }
        }
    }

Example 9

Source File: VaeGradientCheckTests.java From deeplearning4j with Apache License 2.0

4 votes

@Test
    public void testVaeAsMLP() {
        //Post pre-training: a VAE can be used as a MLP, by taking the mean value from p(z|x) as the output
        //This gradient check tests this part

        Activation[] activFns = {Activation.IDENTITY, Activation.TANH, Activation.IDENTITY, Activation.TANH, Activation.IDENTITY, Activation.TANH};

        LossFunction[] lossFunctions = {LossFunction.MCXENT, LossFunction.MCXENT, LossFunction.MSE, LossFunction.MSE, LossFunction.MCXENT, LossFunction.MSE};
        Activation[] outputActivations = {Activation.SOFTMAX, Activation.SOFTMAX, Activation.TANH, Activation.TANH, Activation.SOFTMAX, Activation.TANH};

        //use l2vals[i] with l1vals[i]
        double[] l2vals = {0.4, 0.0, 0.4, 0.4, 0.0, 0.0};
        double[] l1vals = {0.0, 0.0, 0.5, 0.0, 0.0, 0.5};
        double[] biasL2 = {0.0, 0.0, 0.0, 0.2, 0.0, 0.4};
        double[] biasL1 = {0.0, 0.0, 0.6, 0.0, 0.0, 0.0};

        int[][] encoderLayerSizes = new int[][] {{5}, {5}, {5, 6}, {5, 6}, {5}, {5, 6}};
        int[][] decoderLayerSizes = new int[][] {{6}, {7, 8}, {6}, {7, 8}, {6}, {7, 8}};

        int[] minibatches = new int[]{1,5,4,3,1,4};

        Nd4j.getRandom().setSeed(12345);
        for( int i=0; i<activFns.length; i++ ){
            LossFunction lf = lossFunctions[i];
            Activation outputActivation = outputActivations[i];
            double l2 = l2vals[i];
            double l1 = l1vals[i];
            int[] encoderSizes = encoderLayerSizes[i];
            int[] decoderSizes = decoderLayerSizes[i];
            int minibatch = minibatches[i];
            INDArray input = Nd4j.rand(minibatch, 4);
            INDArray labels = Nd4j.create(minibatch, 3);
            for (int j = 0; j < minibatch; j++) {
                labels.putScalar(j, j % 3, 1.0);
            }
            Activation afn = activFns[i];

            MultiLayerConfiguration conf =
                    new NeuralNetConfiguration.Builder().l2(l2).l1(l1)
                            .dataType(DataType.DOUBLE)
                            .updater(new NoOp())
                            .l2Bias(biasL2[i]).l1Bias(biasL1[i])
                            .updater(new NoOp()).seed(12345L).list()
                            .layer(0, new VariationalAutoencoder.Builder().nIn(4)
                                    .nOut(3).encoderLayerSizes(encoderSizes)
                                    .decoderLayerSizes(decoderSizes)

                                    .dist(new NormalDistribution(0, 1))
                                    .activation(afn)
                                    .build())
                            .layer(1, new OutputLayer.Builder(lf)
                                    .activation(outputActivation).nIn(3).nOut(3)

                                    .dist(new NormalDistribution(0, 1))
                                    .build())
                            .build();

            MultiLayerNetwork mln = new MultiLayerNetwork(conf);
            mln.init();

            String msg = "testVaeAsMLP() - activationFn=" + afn + ", lossFn=" + lf
                    + ", outputActivation=" + outputActivation + ", encLayerSizes = "
                    + Arrays.toString(encoderSizes) + ", decLayerSizes = "
                    + Arrays.toString(decoderSizes) + ", l2=" + l2 + ", l1=" + l1;
            if (PRINT_RESULTS) {
                System.out.println(msg);
//                for (int j = 0; j < mln.getnLayers(); j++)
//                    System.out.println("Layer " + j + " # params: " + mln.getLayer(j).numParams());
            }

            boolean gradOK = GradientCheckUtil.checkGradients(mln, DEFAULT_EPS, DEFAULT_MAX_REL_ERROR,
                    DEFAULT_MIN_ABS_ERROR, PRINT_RESULTS, RETURN_ON_FIRST_FAILURE, input,
                    labels);
            assertTrue(msg, gradOK);
            TestUtils.testModelSerialization(mln);
        }
    }