Java Code Examples for org.nd4j.linalg.activations.Activation#SOFTMAX

The following examples show how to use org.nd4j.linalg.activations.Activation#SOFTMAX . You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.

Example 1

Source File: KerasActivationUtils.java From deeplearning4j with Apache License 2.0

5 votes

/**
 * Map Keras to DL4J activation functions.
 *
 * @param conf Keras layer configuration
 * @param kerasActivation String containing Keras activation function name
 * @return Activation enum value containing DL4J activation function name
 */
public static Activation mapToActivation(String kerasActivation, KerasLayerConfiguration conf)
        throws UnsupportedKerasConfigurationException {
    Activation dl4jActivation;
    if (kerasActivation.equals(conf.getKERAS_ACTIVATION_SOFTMAX())) {
        dl4jActivation = Activation.SOFTMAX;
    } else if (kerasActivation.equals(conf.getKERAS_ACTIVATION_SOFTPLUS())) {
        dl4jActivation = Activation.SOFTPLUS;
    } else if (kerasActivation.equals(conf.getKERAS_ACTIVATION_SOFTSIGN())) {
        dl4jActivation = Activation.SOFTSIGN;
    } else if (kerasActivation.equals(conf.getKERAS_ACTIVATION_RELU())) {
        dl4jActivation = Activation.RELU;
    } else if (kerasActivation.equals(conf.getKERAS_ACTIVATION_RELU6())) {
        dl4jActivation = Activation.RELU6;
    } else if (kerasActivation.equals(conf.getKERAS_ACTIVATION_ELU())) {
        dl4jActivation = Activation.ELU;
    } else if (kerasActivation.equals(conf.getKERAS_ACTIVATION_SELU())) {
        dl4jActivation = Activation.SELU;
    } else if (kerasActivation.equals(conf.getKERAS_ACTIVATION_TANH())) {
        dl4jActivation = Activation.TANH;
    } else if (kerasActivation.equals(conf.getKERAS_ACTIVATION_SIGMOID())) {
        dl4jActivation = Activation.SIGMOID;
    } else if (kerasActivation.equals(conf.getKERAS_ACTIVATION_HARD_SIGMOID())) {
        dl4jActivation = Activation.HARDSIGMOID;
    } else if (kerasActivation.equals(conf.getKERAS_ACTIVATION_LINEAR())) {
        dl4jActivation = Activation.IDENTITY;
    } else if (kerasActivation.equals(conf.getKERAS_ACTIVATION_SWISH())) {
        dl4jActivation = Activation.SWISH;
    } else {
        throw new UnsupportedKerasConfigurationException(
                "Unknown Keras activation function " + kerasActivation);
    }
    return dl4jActivation;
}

Example 2

Source File: CNNGradientCheckTest.java From deeplearning4j with Apache License 2.0

4 votes

@Test
    public void testGradientCNNMLN() {
        if(this.format != CNN2DFormat.NCHW) //Only test NCHW due to flat input format...
            return;

        //Parameterized test, testing combinations of:
        // (a) activation function
        // (b) Whether to test at random initialization, or after some learning (i.e., 'characteristic mode of operation')
        // (c) Loss function (with specified output activations)
        Activation[] activFns = {Activation.SIGMOID, Activation.TANH};
        boolean[] characteristic = {false, true}; //If true: run some backprop steps first

        LossFunctions.LossFunction[] lossFunctions =
                {LossFunctions.LossFunction.NEGATIVELOGLIKELIHOOD, LossFunctions.LossFunction.MSE};
        Activation[] outputActivations = {Activation.SOFTMAX, Activation.TANH}; //i.e., lossFunctions[i] used with outputActivations[i] here

        DataSet ds = new IrisDataSetIterator(150, 150).next();
        ds.normalizeZeroMeanZeroUnitVariance();
        INDArray input = ds.getFeatures();
        INDArray labels = ds.getLabels();

        for (Activation afn : activFns) {
            for (boolean doLearningFirst : characteristic) {
                for (int i = 0; i < lossFunctions.length; i++) {
                    LossFunctions.LossFunction lf = lossFunctions[i];
                    Activation outputActivation = outputActivations[i];

                    MultiLayerConfiguration.Builder builder = new NeuralNetConfiguration.Builder()
                            .dataType(DataType.DOUBLE)
                            .optimizationAlgo(OptimizationAlgorithm.CONJUGATE_GRADIENT).updater(new NoOp())
                            .weightInit(WeightInit.XAVIER).seed(12345L).list()
                            .layer(0, new ConvolutionLayer.Builder(1, 1).nOut(6).activation(afn).build())
                            .layer(1, new OutputLayer.Builder(lf).activation(outputActivation).nOut(3).build())
                            .setInputType(InputType.convolutionalFlat(1, 4, 1));

                    MultiLayerConfiguration conf = builder.build();

                    MultiLayerNetwork mln = new MultiLayerNetwork(conf);
                    mln.init();
                    String name = new Object() {
                    }.getClass().getEnclosingMethod().getName();

                    if (doLearningFirst) {
                        //Run a number of iterations of learning
                        mln.setInput(ds.getFeatures());
                        mln.setLabels(ds.getLabels());
                        mln.computeGradientAndScore();
                        double scoreBefore = mln.score();
                        for (int j = 0; j < 10; j++)
                            mln.fit(ds);
                        mln.computeGradientAndScore();
                        double scoreAfter = mln.score();
                        //Can't test in 'characteristic mode of operation' if not learning
                        String msg = name + " - score did not (sufficiently) decrease during learning - activationFn="
                                + afn + ", lossFn=" + lf + ", outputActivation=" + outputActivation
                                + ", doLearningFirst= " + doLearningFirst + " (before=" + scoreBefore
                                + ", scoreAfter=" + scoreAfter + ")";
                        assertTrue(msg, scoreAfter < 0.9 * scoreBefore);
                    }

                    if (PRINT_RESULTS) {
                        System.out.println(name + " - activationFn=" + afn + ", lossFn=" + lf + ", outputActivation="
                                + outputActivation + ", doLearningFirst=" + doLearningFirst);
//                        for (int j = 0; j < mln.getnLayers(); j++)
//                            System.out.println("Layer " + j + " # params: " + mln.getLayer(j).numParams());
                    }

                    boolean gradOK = GradientCheckUtil.checkGradients(mln, DEFAULT_EPS, DEFAULT_MAX_REL_ERROR,
                            DEFAULT_MIN_ABS_ERROR, PRINT_RESULTS, RETURN_ON_FIRST_FAILURE, input, labels);

                    assertTrue(gradOK);
                    TestUtils.testModelSerialization(mln);
                }
            }
        }
    }

Example 3

Source File: TestScoreFunctions.java From deeplearning4j with Apache License 2.0

4 votes

@Test
public void testROCScoreFunctions() throws Exception {


    for (boolean auc : new boolean[]{true, false}) {
        for (ROCScoreFunction.ROCType rocType : ROCScoreFunction.ROCType.values()) {
            String msg = (auc ? "AUC" : "AUPRC") + " - " + rocType;
            log.info("Starting: " + msg);

            ParameterSpace<Double> lr = new ContinuousParameterSpace(1e-5, 1e-3);

            int nOut = (rocType == ROCScoreFunction.ROCType.ROC ? 2 : 10);
            LossFunctions.LossFunction lf = (rocType == ROCScoreFunction.ROCType.BINARY ?
                    LossFunctions.LossFunction.XENT : LossFunctions.LossFunction.MCXENT);
            Activation a = (rocType == ROCScoreFunction.ROCType.BINARY ? Activation.SIGMOID : Activation.SOFTMAX);
            MultiLayerSpace mls = new MultiLayerSpace.Builder()
                    .trainingWorkspaceMode(WorkspaceMode.NONE)
                    .inferenceWorkspaceMode(WorkspaceMode.NONE)
                    .updater(new AdamSpace(lr))
                    .weightInit(WeightInit.XAVIER)
                    .layer(new OutputLayerSpace.Builder().nIn(784).nOut(nOut)
                            .activation(a)
                            .lossFunction(lf).build())
                    .build();

            CandidateGenerator cg = new RandomSearchGenerator(mls);
            ResultSaver rs = new InMemoryResultSaver();
            ScoreFunction sf = new ROCScoreFunction(rocType, (auc ? ROCScoreFunction.Metric.AUC : ROCScoreFunction.Metric.AUPRC));


            OptimizationConfiguration oc = new OptimizationConfiguration.Builder()
                    .candidateGenerator(cg)
                    .dataProvider(new DP(rocType))
                    .modelSaver(rs)
                    .scoreFunction(sf)
                    .terminationConditions(new MaxCandidatesCondition(3))
                    .rngSeed(12345)
                    .build();

            IOptimizationRunner runner = new LocalOptimizationRunner(oc, new MultiLayerNetworkTaskCreator());
            runner.execute();

            List<ResultReference> list = runner.getResults();

            for (ResultReference rr : list) {
                DataSetIterator testIter = new MnistDataSetIterator(4, 16, false, false, false, 12345);
                testIter.setPreProcessor(new PreProc(rocType));

                OptimizationResult or = rr.getResult();
                MultiLayerNetwork net = (MultiLayerNetwork) or.getResultReference().getResultModel();

                double expScore;
                switch (rocType){
                    case ROC:
                        if(auc){
                            expScore = net.doEvaluation(testIter, new ROC())[0].calculateAUC();
                        } else {
                            expScore = net.doEvaluation(testIter, new ROC())[0].calculateAUCPR();
                        }
                        break;
                    case BINARY:
                        if(auc){
                            expScore = net.doEvaluation(testIter, new ROCBinary())[0].calculateAverageAuc();
                        } else {
                            expScore = net.doEvaluation(testIter, new ROCBinary())[0].calculateAverageAUCPR();
                        }
                        break;
                    case MULTICLASS:
                        if(auc){
                            expScore = net.doEvaluation(testIter, new ROCMultiClass())[0].calculateAverageAUC();
                        } else {
                            expScore = net.doEvaluation(testIter, new ROCMultiClass())[0].calculateAverageAUCPR();
                        }
                        break;
                    default:
                        throw new RuntimeException();
                }


                DataSetIterator iter = new MnistDataSetIterator(4, 16, false, false, false, 12345);
                iter.setPreProcessor(new PreProc(rocType));

                assertEquals(msg, expScore, or.getScore(), 1e-4);
            }
        }
    }
}

Example 4

Source File: CNNGradientCheckTest.java From deeplearning4j with Apache License 2.0

4 votes

@Test
    public void testGradientCNNL1L2MLN() {
        //Parameterized test, testing combinations of:
        // (a) activation function
        // (b) Whether to test at random initialization, or after some learning (i.e., 'characteristic mode of operation')
        // (c) Loss function (with specified output activations)

        DataSet ds = new IrisDataSetIterator(150, 150).next();
        ds.normalizeZeroMeanZeroUnitVariance();
        INDArray input = ds.getFeatures();
        INDArray labels = ds.getLabels();

        //use l2vals[i] with l1vals[i]
        double[] l2vals = {0.4, 0.0, 0.4, 0.4};
        double[] l1vals = {0.0, 0.0, 0.5, 0.0};
        double[] biasL2 = {0.0, 0.0, 0.0, 0.2};
        double[] biasL1 = {0.0, 0.0, 0.6, 0.0};
        Activation[] activFns = {Activation.SIGMOID, Activation.TANH, Activation.ELU, Activation.SOFTPLUS};
        boolean[] characteristic = {false, true, false, true}; //If true: run some backprop steps first

        LossFunctions.LossFunction[] lossFunctions =
                {LossFunctions.LossFunction.NEGATIVELOGLIKELIHOOD, LossFunctions.LossFunction.MSE, LossFunctions.LossFunction.NEGATIVELOGLIKELIHOOD, LossFunctions.LossFunction.MSE};
        Activation[] outputActivations = {Activation.SOFTMAX, Activation.TANH, Activation.SOFTMAX, Activation.IDENTITY}; //i.e., lossFunctions[i] used with outputActivations[i] here

        for( int i=0; i<l2vals.length; i++ ){
            Activation afn = activFns[i];
            boolean doLearningFirst = characteristic[i];
            LossFunctions.LossFunction lf = lossFunctions[i];
            Activation outputActivation = outputActivations[i];
            double l2 = l2vals[i];
            double l1 = l1vals[i];

            MultiLayerConfiguration.Builder builder = new NeuralNetConfiguration.Builder()
                    .dataType(DataType.DOUBLE)
                    .l2(l2).l1(l1).l2Bias(biasL2[i]).l1Bias(biasL1[i])
                    .optimizationAlgo(
                            OptimizationAlgorithm.CONJUGATE_GRADIENT)
                    .seed(12345L).list()
                    .layer(0, new ConvolutionLayer.Builder(new int[]{1, 1}).nIn(1).nOut(6)
                            .weightInit(WeightInit.XAVIER).activation(afn)
                            .updater(new NoOp()).build())
                    .layer(1, new OutputLayer.Builder(lf).activation(outputActivation).nOut(3)
                            .weightInit(WeightInit.XAVIER).updater(new NoOp()).build())

                    .setInputType(InputType.convolutionalFlat(1, 4, 1));

            MultiLayerConfiguration conf = builder.build();

            MultiLayerNetwork mln = new MultiLayerNetwork(conf);
            mln.init();
            String testName = new Object() {
            }.getClass().getEnclosingMethod().getName();

            if (doLearningFirst) {
                //Run a number of iterations of learning
                mln.setInput(ds.getFeatures());
                mln.setLabels(ds.getLabels());
                mln.computeGradientAndScore();
                double scoreBefore = mln.score();
                for (int j = 0; j < 10; j++)
                    mln.fit(ds);
                mln.computeGradientAndScore();
                double scoreAfter = mln.score();
                //Can't test in 'characteristic mode of operation' if not learning
                String msg = testName
                        + "- score did not (sufficiently) decrease during learning - activationFn="
                        + afn + ", lossFn=" + lf + ", outputActivation=" + outputActivation
                        + ", doLearningFirst=" + doLearningFirst + " (before=" + scoreBefore
                        + ", scoreAfter=" + scoreAfter + ")";
                assertTrue(msg, scoreAfter < 0.8 * scoreBefore);
            }

            if (PRINT_RESULTS) {
                System.out.println(testName + "- activationFn=" + afn + ", lossFn=" + lf
                        + ", outputActivation=" + outputActivation + ", doLearningFirst="
                        + doLearningFirst);
//                for (int j = 0; j < mln.getnLayers(); j++)
//                    System.out.println("Layer " + j + " # params: " + mln.getLayer(j).numParams());
            }

            boolean gradOK = GradientCheckUtil.checkGradients(mln, DEFAULT_EPS, DEFAULT_MAX_REL_ERROR,
                    DEFAULT_MIN_ABS_ERROR, PRINT_RESULTS, RETURN_ON_FIRST_FAILURE, input, labels);

            assertTrue(gradOK);
            TestUtils.testModelSerialization(mln);
        }
    }

Example 5

Source File: CNNGradientCheckTest.java From deeplearning4j with Apache License 2.0

4 votes

@Test
public void testGradientCNNMLN() {
    //Parameterized test, testing combinations of:
    // (a) activation function
    // (b) Whether to test at random initialization, or after some learning (i.e., 'characteristic mode of operation')
    // (c) Loss function (with specified output activations)
    Activation[] activFns = {Activation.SIGMOID, Activation.TANH};
    boolean[] characteristic = {false, true}; //If true: run some backprop steps first

    LossFunctions.LossFunction[] lossFunctions =
            {LossFunctions.LossFunction.NEGATIVELOGLIKELIHOOD, LossFunctions.LossFunction.MSE};
    Activation[] outputActivations = {Activation.SOFTMAX, Activation.TANH}; //i.e., lossFunctions[i] used with outputActivations[i] here

    DataSet ds = new IrisDataSetIterator(150, 150).next();
    ds.normalizeZeroMeanZeroUnitVariance();
    INDArray input = ds.getFeatures();
    INDArray labels = ds.getLabels();

    for (Activation afn : activFns) {
        for (boolean doLearningFirst : characteristic) {
            for (int i = 0; i < lossFunctions.length; i++) {
                LossFunctions.LossFunction lf = lossFunctions[i];
                Activation outputActivation = outputActivations[i];

                MultiLayerConfiguration.Builder builder = new NeuralNetConfiguration.Builder()
                        .dataType(DataType.DOUBLE)
                        .optimizationAlgo(OptimizationAlgorithm.CONJUGATE_GRADIENT).updater(new NoOp())
                        .weightInit(WeightInit.XAVIER).seed(12345L).list()
                        .layer(0, new ConvolutionLayer.Builder(1, 1).nOut(6).activation(afn)
                                .cudnnAllowFallback(false)
                                .build())
                        .layer(1, new OutputLayer.Builder(lf).activation(outputActivation).nOut(3).build())
                        .setInputType(InputType.convolutionalFlat(1, 4, 1));

                MultiLayerConfiguration conf = builder.build();

                MultiLayerNetwork mln = new MultiLayerNetwork(conf);
                mln.init();
                String name = new Object() {
                }.getClass().getEnclosingMethod().getName();

                if (doLearningFirst) {
                    //Run a number of iterations of learning
                    mln.setInput(ds.getFeatures());
                    mln.setLabels(ds.getLabels());
                    mln.computeGradientAndScore();
                    double scoreBefore = mln.score();
                    for (int j = 0; j < 10; j++)
                        mln.fit(ds);
                    mln.computeGradientAndScore();
                    double scoreAfter = mln.score();
                    //Can't test in 'characteristic mode of operation' if not learning
                    String msg = name + " - score did not (sufficiently) decrease during learning - activationFn="
                            + afn + ", lossFn=" + lf + ", outputActivation=" + outputActivation
                            + ", doLearningFirst= " + doLearningFirst + " (before=" + scoreBefore
                            + ", scoreAfter=" + scoreAfter + ")";
                    assertTrue(msg, scoreAfter < 0.8 * scoreBefore);
                }

                if (PRINT_RESULTS) {
                    System.out.println(name + " - activationFn=" + afn + ", lossFn=" + lf + ", outputActivation="
                            + outputActivation + ", doLearningFirst=" + doLearningFirst);
                }

                boolean gradOK = GradientCheckUtil.checkGradients(mln, DEFAULT_EPS, DEFAULT_MAX_REL_ERROR,
                        DEFAULT_MIN_ABS_ERROR, PRINT_RESULTS, RETURN_ON_FIRST_FAILURE, input, labels);

                assertTrue(gradOK);
                TestUtils.testModelSerialization(mln);
            }
        }
    }
}

Example 6

Source File: ComputationGraphConfigurationTest.java From deeplearning4j with Apache License 2.0

4 votes

@Test
public void testInvalidOutputLayer(){
    /*
    Test case (invalid configs)
    1. nOut=1 + softmax
    2. mcxent + tanh
    3. xent + softmax
    4. xent + relu
    5. mcxent + sigmoid
     */

    LossFunctions.LossFunction[] lf = new LossFunctions.LossFunction[]{
            LossFunctions.LossFunction.MCXENT, LossFunctions.LossFunction.MCXENT, LossFunctions.LossFunction.XENT,
            LossFunctions.LossFunction.XENT, LossFunctions.LossFunction.MCXENT};
    int[] nOut = new int[]{1, 3, 3, 3, 3};
    Activation[] activations = new Activation[]{Activation.SOFTMAX, Activation.TANH, Activation.SOFTMAX, Activation.RELU, Activation.SIGMOID};
    for( int i=0; i<lf.length; i++ ){
        for(boolean lossLayer : new boolean[]{false, true}) {
            for (boolean validate : new boolean[]{true, false}) {
                String s = "nOut=" + nOut[i] + ",lossFn=" + lf[i] + ",lossLayer=" + lossLayer + ",validate=" + validate;
                if(nOut[i] == 1 && lossLayer)
                    continue;   //nOuts are not availabel in loss layer, can't expect it to detect this case
                try {
                    new NeuralNetConfiguration.Builder()
                            .graphBuilder()
                            .addInputs("in")
                            .layer("0", new DenseLayer.Builder().nIn(10).nOut(10).build(), "in")
                            .layer("1",
                                    !lossLayer ? new OutputLayer.Builder().nIn(10).nOut(nOut[i]).activation(activations[i]).lossFunction(lf[i]).build()
                                            : new LossLayer.Builder().activation(activations[i]).lossFunction(lf[i]).build(), "0")
                            .setOutputs("1")
                            .validateOutputLayerConfig(validate)
                            .build();
                    if (validate) {
                        fail("Expected exception: " + s);
                    }
                } catch (DL4JInvalidConfigException e) {
                    if (validate) {
                        assertTrue(s, e.getMessage().toLowerCase().contains("invalid output"));
                    } else {
                        fail("Validation should not be enabled");
                    }
                }
            }
        }
    }
}

Example 7

Source File: MultiLayerNeuralNetConfigurationTest.java From deeplearning4j with Apache License 2.0

4 votes

@Test
public void testInvalidOutputLayer(){
    /*
    Test case (invalid configs)
    1. nOut=1 + softmax
    2. mcxent + tanh
    3. xent + softmax
    4. xent + relu
    5. mcxent + sigmoid
     */

    LossFunctions.LossFunction[] lf = new LossFunctions.LossFunction[]{
            LossFunctions.LossFunction.MCXENT, LossFunctions.LossFunction.MCXENT, LossFunctions.LossFunction.XENT,
            LossFunctions.LossFunction.XENT, LossFunctions.LossFunction.MCXENT};
    int[] nOut = new int[]{1, 3, 3, 3, 3};
    Activation[] activations = new Activation[]{Activation.SOFTMAX, Activation.TANH, Activation.SOFTMAX, Activation.RELU, Activation.SIGMOID};
    for( int i=0; i<lf.length; i++ ){
        for(boolean lossLayer : new boolean[]{false, true}) {
            for (boolean validate : new boolean[]{true, false}) {
                String s = "nOut=" + nOut[i] + ",lossFn=" + lf[i] + ",lossLayer=" + lossLayer + ",validate=" + validate;
                if(nOut[i] == 1 && lossLayer)
                    continue;   //nOuts are not availabel in loss layer, can't expect it to detect this case
                try {
                    new NeuralNetConfiguration.Builder()
                            .list()
                            .layer(new DenseLayer.Builder().nIn(10).nOut(10).build())
                            .layer(!lossLayer ? new OutputLayer.Builder().nIn(10).nOut(nOut[i]).activation(activations[i]).lossFunction(lf[i]).build()
                                            : new LossLayer.Builder().activation(activations[i]).lossFunction(lf[i]).build())
                            .validateOutputLayerConfig(validate)
                            .build();
                    if (validate) {
                        fail("Expected exception: " + s);
                    }
                } catch (DL4JInvalidConfigException e) {
                    if (validate) {
                        assertTrue(s, e.getMessage().toLowerCase().contains("invalid output"));
                    } else {
                        fail("Validation should not be enabled");
                    }
                }
            }
        }
    }
}

Example 8

Source File: JsonTest.java From deeplearning4j with Apache License 2.0

4 votes

@Test
public void testJsonLossFunctions() {

    ILossFunction[] lossFunctions = new ILossFunction[] {new LossBinaryXENT(), new LossBinaryXENT(),
                    new LossCosineProximity(), new LossHinge(), new LossKLD(), new LossKLD(), new LossL1(),
                    new LossL1(), new LossL2(), new LossL2(), new LossMAE(), new LossMAE(), new LossMAPE(),
                    new LossMAPE(), new LossMCXENT(), new LossMSE(), new LossMSE(), new LossMSLE(), new LossMSLE(),
                    new LossNegativeLogLikelihood(), new LossNegativeLogLikelihood(), new LossPoisson(),
                    new LossSquaredHinge(), new LossFMeasure(), new LossFMeasure(2.0)};

    Activation[] outputActivationFn = new Activation[] {Activation.SIGMOID, //xent
                    Activation.SIGMOID, //xent
                    Activation.TANH, //cosine
                    Activation.TANH, //hinge -> trying to predict 1 or -1
                    Activation.SIGMOID, //kld -> probab so should be between 0 and 1
                    Activation.SOFTMAX, //kld + softmax
                    Activation.TANH, //l1
                    Activation.SOFTMAX, //l1 + softmax
                    Activation.TANH, //l2
                    Activation.SOFTMAX, //l2 + softmax
                    Activation.IDENTITY, //mae
                    Activation.SOFTMAX, //mae + softmax
                    Activation.IDENTITY, //mape
                    Activation.SOFTMAX, //mape + softmax
                    Activation.SOFTMAX, //mcxent
                    Activation.IDENTITY, //mse
                    Activation.SOFTMAX, //mse + softmax
                    Activation.SIGMOID, //msle  -   requires positive labels/activations due to log
                    Activation.SOFTMAX, //msle + softmax
                    Activation.SIGMOID, //nll
                    Activation.SOFTMAX, //nll + softmax
                    Activation.SIGMOID, //poisson - requires positive predictions due to log... not sure if this is the best option
                    Activation.TANH, //squared hinge
                    Activation.SIGMOID, //f-measure (binary, single sigmoid output)
                    Activation.SOFTMAX //f-measure (binary, 2-label softmax output)
    };

    int[] nOut = new int[] {1, //xent
                    3, //xent
                    5, //cosine
                    3, //hinge
                    3, //kld
                    3, //kld + softmax
                    3, //l1
                    3, //l1 + softmax
                    3, //l2
                    3, //l2 + softmax
                    3, //mae
                    3, //mae + softmax
                    3, //mape
                    3, //mape + softmax
                    3, //mcxent
                    3, //mse
                    3, //mse + softmax
                    3, //msle
                    3, //msle + softmax
                    3, //nll
                    3, //nll + softmax
                    3, //poisson
                    3, //squared hinge
                    1, //f-measure (binary, single sigmoid output)
                    2, //f-measure (binary, 2-label softmax output)
    };

    for (int i = 0; i < lossFunctions.length; i++) {

        MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder().seed(12345).updater(Updater.ADAM).list()
                        .layer(0, new DenseLayer.Builder().nIn(4).nOut(nOut[i]).activation(Activation.TANH).build())
                        .layer(1, new LossLayer.Builder().lossFunction(lossFunctions[i])
                                        .activation(outputActivationFn[i]).build())
                        .validateOutputLayerConfig(false).build();

        String json = conf.toJson();
        String yaml = conf.toYaml();

        MultiLayerConfiguration fromJson = MultiLayerConfiguration.fromJson(json);
        MultiLayerConfiguration fromYaml = MultiLayerConfiguration.fromYaml(yaml);

        assertEquals(conf, fromJson);
        assertEquals(conf, fromYaml);
    }
}

Example 9

Source File: TestSameDiffOutput.java From deeplearning4j with Apache License 2.0

4 votes

@Test
public void testMSEOutputLayer(){       //Faliing 2019/04/17 - https://github.com/deeplearning4j/deeplearning4j/issues/7560
    Nd4j.getRandom().setSeed(12345);

    for(Activation a : new Activation[]{Activation.IDENTITY, Activation.TANH, Activation.SOFTMAX}) {
        log.info("Starting test: " + a);

        MultiLayerConfiguration confSD = new NeuralNetConfiguration.Builder()
                .seed(12345)
                .updater(new Adam(0.01))
                .list()
                .layer(new DenseLayer.Builder().nIn(5).nOut(5).activation(Activation.TANH).build())
                .layer(new SameDiffMSEOutputLayer(5, 5, a, WeightInit.XAVIER))
                .build();

        MultiLayerConfiguration confStd = new NeuralNetConfiguration.Builder()
                .seed(12345)
                .updater(new Adam(0.01))
                .list()
                .layer(new DenseLayer.Builder().nIn(5).nOut(5).activation(Activation.TANH).build())
                .layer(new OutputLayer.Builder().nIn(5).nOut(5).activation(a).lossFunction(LossFunctions.LossFunction.MSE).build())
                .build();

        MultiLayerNetwork netSD = new MultiLayerNetwork(confSD);
        netSD.init();

        MultiLayerNetwork netStd = new MultiLayerNetwork(confStd);
        netStd.init();

        netSD.params().assign(netStd.params());

        assertEquals(netStd.paramTable(), netSD.paramTable());

        int minibatch = 2;
        INDArray in = Nd4j.rand(minibatch, 5);
        INDArray label = Nd4j.rand(minibatch, 5);

        INDArray outSD = netSD.output(in);
        INDArray outStd = netStd.output(in);
        assertEquals(outStd, outSD);

        DataSet ds = new DataSet(in, label);
        double scoreSD = netSD.score(ds);
        double scoreStd = netStd.score(ds);
        assertEquals(scoreStd, scoreSD, 1e-6);

        netSD.setInput(in);
        netSD.setLabels(label);

        netStd.setInput(in);
        netStd.setLabels(label);

        //System.out.println(((SameDiffOutputLayer) netSD.getLayer(1)).sameDiff.summary());

        netSD.computeGradientAndScore();
        netStd.computeGradientAndScore();

        assertEquals(netStd.getFlattenedGradients(), netSD.getFlattenedGradients());

        for (int i = 0; i < 3; i++) {
            netSD.fit(ds);
            netStd.fit(ds);
            String s = String.valueOf(i);
            assertEquals(s, netStd.params(), netSD.params());
            assertEquals(s, netStd.getFlattenedGradients(), netSD.getFlattenedGradients());
        }

        //Test fit before output:
        MultiLayerNetwork net = new MultiLayerNetwork(confSD.clone());
        net.init();
        net.fit(ds);

        //Sanity check on different minibatch sizes:
        INDArray newIn = Nd4j.vstack(in, in);
        INDArray outMbsd = netSD.output(newIn);
        INDArray outMb = netStd.output(newIn);
        assertEquals(outMb, outMbsd);
    }
}

Example 10

Source File: VaeGradientCheckTests.java From deeplearning4j with Apache License 2.0

4 votes

@Test
    public void testVaeAsMLP() {
        //Post pre-training: a VAE can be used as a MLP, by taking the mean value from p(z|x) as the output
        //This gradient check tests this part

        Activation[] activFns = {Activation.IDENTITY, Activation.TANH, Activation.IDENTITY, Activation.TANH, Activation.IDENTITY, Activation.TANH};

        LossFunction[] lossFunctions = {LossFunction.MCXENT, LossFunction.MCXENT, LossFunction.MSE, LossFunction.MSE, LossFunction.MCXENT, LossFunction.MSE};
        Activation[] outputActivations = {Activation.SOFTMAX, Activation.SOFTMAX, Activation.TANH, Activation.TANH, Activation.SOFTMAX, Activation.TANH};

        //use l2vals[i] with l1vals[i]
        double[] l2vals = {0.4, 0.0, 0.4, 0.4, 0.0, 0.0};
        double[] l1vals = {0.0, 0.0, 0.5, 0.0, 0.0, 0.5};
        double[] biasL2 = {0.0, 0.0, 0.0, 0.2, 0.0, 0.4};
        double[] biasL1 = {0.0, 0.0, 0.6, 0.0, 0.0, 0.0};

        int[][] encoderLayerSizes = new int[][] {{5}, {5}, {5, 6}, {5, 6}, {5}, {5, 6}};
        int[][] decoderLayerSizes = new int[][] {{6}, {7, 8}, {6}, {7, 8}, {6}, {7, 8}};

        int[] minibatches = new int[]{1,5,4,3,1,4};

        Nd4j.getRandom().setSeed(12345);
        for( int i=0; i<activFns.length; i++ ){
            LossFunction lf = lossFunctions[i];
            Activation outputActivation = outputActivations[i];
            double l2 = l2vals[i];
            double l1 = l1vals[i];
            int[] encoderSizes = encoderLayerSizes[i];
            int[] decoderSizes = decoderLayerSizes[i];
            int minibatch = minibatches[i];
            INDArray input = Nd4j.rand(minibatch, 4);
            INDArray labels = Nd4j.create(minibatch, 3);
            for (int j = 0; j < minibatch; j++) {
                labels.putScalar(j, j % 3, 1.0);
            }
            Activation afn = activFns[i];

            MultiLayerConfiguration conf =
                    new NeuralNetConfiguration.Builder().l2(l2).l1(l1)
                            .dataType(DataType.DOUBLE)
                            .updater(new NoOp())
                            .l2Bias(biasL2[i]).l1Bias(biasL1[i])
                            .updater(new NoOp()).seed(12345L).list()
                            .layer(0, new VariationalAutoencoder.Builder().nIn(4)
                                    .nOut(3).encoderLayerSizes(encoderSizes)
                                    .decoderLayerSizes(decoderSizes)

                                    .dist(new NormalDistribution(0, 1))
                                    .activation(afn)
                                    .build())
                            .layer(1, new OutputLayer.Builder(lf)
                                    .activation(outputActivation).nIn(3).nOut(3)

                                    .dist(new NormalDistribution(0, 1))
                                    .build())
                            .build();

            MultiLayerNetwork mln = new MultiLayerNetwork(conf);
            mln.init();

            String msg = "testVaeAsMLP() - activationFn=" + afn + ", lossFn=" + lf
                    + ", outputActivation=" + outputActivation + ", encLayerSizes = "
                    + Arrays.toString(encoderSizes) + ", decLayerSizes = "
                    + Arrays.toString(decoderSizes) + ", l2=" + l2 + ", l1=" + l1;
            if (PRINT_RESULTS) {
                System.out.println(msg);
//                for (int j = 0; j < mln.getnLayers(); j++)
//                    System.out.println("Layer " + j + " # params: " + mln.getLayer(j).numParams());
            }

            boolean gradOK = GradientCheckUtil.checkGradients(mln, DEFAULT_EPS, DEFAULT_MAX_REL_ERROR,
                    DEFAULT_MIN_ABS_ERROR, PRINT_RESULTS, RETURN_ON_FIRST_FAILURE, input,
                    labels);
            assertTrue(msg, gradOK);
            TestUtils.testModelSerialization(mln);
        }
    }

Example 11

Source File: CNNGradientCheckTest.java From deeplearning4j with Apache License 2.0

4 votes

@Test
    public void testGradientCNNL1L2MLN() {
        if(this.format != CNN2DFormat.NCHW) //Only test NCHW due to flat input format...
            return;

        //Parameterized test, testing combinations of:
        // (a) activation function
        // (b) Whether to test at random initialization, or after some learning (i.e., 'characteristic mode of operation')
        // (c) Loss function (with specified output activations)

        DataSet ds = new IrisDataSetIterator(150, 150).next();
        ds.normalizeZeroMeanZeroUnitVariance();
        INDArray input = ds.getFeatures();
        INDArray labels = ds.getLabels();

        //use l2vals[i] with l1vals[i]
        double[] l2vals = {0.4, 0.0, 0.4, 0.4};
        double[] l1vals = {0.0, 0.0, 0.5, 0.0};
        double[] biasL2 = {0.0, 0.0, 0.0, 0.2};
        double[] biasL1 = {0.0, 0.0, 0.6, 0.0};
        Activation[] activFns = {Activation.SIGMOID, Activation.TANH, Activation.ELU, Activation.SOFTPLUS};
        boolean[] characteristic = {false, true, false, true}; //If true: run some backprop steps first

        LossFunctions.LossFunction[] lossFunctions =
                {LossFunctions.LossFunction.NEGATIVELOGLIKELIHOOD, LossFunctions.LossFunction.MSE, LossFunctions.LossFunction.NEGATIVELOGLIKELIHOOD, LossFunctions.LossFunction.MSE};
        Activation[] outputActivations = {Activation.SOFTMAX, Activation.TANH, Activation.SOFTMAX, Activation.IDENTITY}; //i.e., lossFunctions[i] used with outputActivations[i] here

        for( int i=0; i<l2vals.length; i++ ){
            Activation afn = activFns[i];
            boolean doLearningFirst = characteristic[i];
            LossFunctions.LossFunction lf = lossFunctions[i];
            Activation outputActivation = outputActivations[i];
            double l2 = l2vals[i];
            double l1 = l1vals[i];

            MultiLayerConfiguration.Builder builder = new NeuralNetConfiguration.Builder()
                    .dataType(DataType.DOUBLE)
                    .l2(l2).l1(l1).l2Bias(biasL2[i]).l1Bias(biasL1[i])
                    .optimizationAlgo(
                            OptimizationAlgorithm.CONJUGATE_GRADIENT)
                    .seed(12345L).list()
                    .layer(0, new ConvolutionLayer.Builder(new int[]{1, 1}).nIn(1).nOut(6)
                            .weightInit(WeightInit.XAVIER).activation(afn)
                            .updater(new NoOp()).build())
                    .layer(1, new OutputLayer.Builder(lf).activation(outputActivation).nOut(3)
                            .weightInit(WeightInit.XAVIER).updater(new NoOp()).build())

                    .setInputType(InputType.convolutionalFlat(1, 4, 1));

            MultiLayerConfiguration conf = builder.build();

            MultiLayerNetwork mln = new MultiLayerNetwork(conf);
            mln.init();
            String testName = new Object() {
            }.getClass().getEnclosingMethod().getName();

            if (doLearningFirst) {
                //Run a number of iterations of learning
                mln.setInput(ds.getFeatures());
                mln.setLabels(ds.getLabels());
                mln.computeGradientAndScore();
                double scoreBefore = mln.score();
                for (int j = 0; j < 10; j++)
                    mln.fit(ds);
                mln.computeGradientAndScore();
                double scoreAfter = mln.score();
                //Can't test in 'characteristic mode of operation' if not learning
                String msg = testName
                        + "- score did not (sufficiently) decrease during learning - activationFn="
                        + afn + ", lossFn=" + lf + ", outputActivation=" + outputActivation
                        + ", doLearningFirst=" + doLearningFirst + " (before=" + scoreBefore
                        + ", scoreAfter=" + scoreAfter + ")";
                assertTrue(msg, scoreAfter < 0.8 * scoreBefore);
            }

            if (PRINT_RESULTS) {
                System.out.println(testName + "- activationFn=" + afn + ", lossFn=" + lf
                        + ", outputActivation=" + outputActivation + ", doLearningFirst="
                        + doLearningFirst);
//                for (int j = 0; j < mln.getnLayers(); j++)
//                    System.out.println("Layer " + j + " # params: " + mln.getLayer(j).numParams());
            }

            boolean gradOK = GradientCheckUtil.checkGradients(mln, DEFAULT_EPS, DEFAULT_MAX_REL_ERROR,
                    DEFAULT_MIN_ABS_ERROR, PRINT_RESULTS, RETURN_ON_FIRST_FAILURE, input, labels);

            assertTrue(gradOK);
            TestUtils.testModelSerialization(mln);
        }
    }

Example 12

Source File: Vasttext.java From scava with Eclipse Public License 2.0

4 votes

private ComputationGraph VasttextTextualAndNumeric()
{
	Activation activation = null;
	LossFunction loss = null;
	//If multilabel, it is considered according to the book "Deep Learning with Python" to use the following parameters
	if(multiLabel)
	{
		activation = Activation.SIGMOID;
		loss = LossFunction.XENT; //Binary Crossentropy
	}
	else
	{
		//We're using a softmax/cross entropy for the binary classification, as the number of neurons is two. If the number of neurons would be one, then
		//the activation would be sigmoid and the loss binary crossentropy
		activation = Activation.SOFTMAX;
		loss = LossFunction.MCXENT;	//CATEGORICAL_CROSSENTROPY
	}

	System.err.println("LR:"+lr);
	
	System.err.println("Dense:"+denseDimension);

	ComputationGraphConfiguration  nnConf = new NeuralNetConfiguration.Builder()
			.updater(new Adam(lr))
			.weightInit(WeightInit.XAVIER)
			.trainingWorkspaceMode(WorkspaceMode.ENABLED)
               .inferenceWorkspaceMode(WorkspaceMode.ENABLED)
			.graphBuilder()
			.addInputs("Text", "Extra")
			//Embeddings Parts
			.addLayer("Embeddings", new EmbeddingSequenceLayer.Builder()
                       .nIn(textFeaturesSize)
                       .nOut(denseDimension)
                       .activation(Activation.IDENTITY)
                       //.activation(Activation.TANH)
                       //.dropOut(0.0)
                       .build(), "Text")
			.addLayer("GlobalPooling", new GlobalPoolingLayer.Builder()
                       .poolingType(PoolingType.AVG)
                       .poolingDimensions(2)
                       .collapseDimensions(true)
                       //.dropOut(0.0)
                       .build(), "Embeddings")
			//We're merging directly the values from the extra
			.addVertex("Merge", new MergeVertex(), "GlobalPooling","Extra")
			.addLayer("DenseAll", new DenseLayer.Builder()
					.nIn(denseDimension+numericFeaturesSize)
					.nOut(denseDimension/2)
					//.dropOut(0.5)
					//.l2(0.001)
					.build(), "Merge")
			.addLayer("Output", new OutputLayer.Builder()
					//.dropOut(0.5)
					.nIn(denseDimension/2)
                       .nOut(labelsSize)
                       .activation(activation)
                       .lossFunction(loss)
                       .build(), "DenseAll")
			.setOutputs("Output")
			.pretrain(false)
			.backprop(true)
			.build();

	return new ComputationGraph(nnConf);
}

Example 13

Source File: BNGradientCheckTest.java From deeplearning4j with Apache License 2.0

4 votes

@Test
    public void testGradientBNWithCNNandSubsamplingCompGraph() {
        //Parameterized test, testing combinations of:
        // (a) activation function
        // (b) Whether to test at random initialization, or after some learning (i.e., 'characteristic mode of operation')
        // (c) Loss function (with specified output activations)
        // (d) l1 and l2 values
        Activation[] activFns = {Activation.TANH, Activation.IDENTITY};
        boolean doLearningFirst = true;

        LossFunctions.LossFunction[] lossFunctions = {LossFunctions.LossFunction.NEGATIVELOGLIKELIHOOD};
        Activation[] outputActivations = {Activation.SOFTMAX}; //i.e., lossFunctions[i] used with outputActivations[i] here

        double[] l2vals = {0.0, 0.1};
        double[] l1vals = {0.0, 0.2}; //i.e., use l2vals[j] with l1vals[j]

        Nd4j.getRandom().setSeed(12345);
        int minibatch = 10;
        int depth = 2;
        int hw = 5;
        int nOut = 3;
        INDArray input = Nd4j.rand(new int[]{minibatch, depth, hw, hw});
        INDArray labels = Nd4j.zeros(minibatch, nOut);
        Random r = new Random(12345);
        for (int i = 0; i < minibatch; i++) {
            labels.putScalar(i, r.nextInt(nOut), 1.0);
        }

        DataSet ds = new DataSet(input, labels);

        for (boolean useLogStd : new boolean[]{true, false}) {
            for (Activation afn : activFns) {
                for (int i = 0; i < lossFunctions.length; i++) {
                    for (int j = 0; j < l2vals.length; j++) {
                        LossFunctions.LossFunction lf = lossFunctions[i];
                        Activation outputActivation = outputActivations[i];

                        ComputationGraphConfiguration conf = new NeuralNetConfiguration.Builder().seed(12345)
                                .dataType(DataType.DOUBLE)
                                .optimizationAlgo(OptimizationAlgorithm.LINE_GRADIENT_DESCENT)
                                .updater(new NoOp())
                                .dist(new UniformDistribution(-2, 2)).seed(12345L).graphBuilder()
                                .addInputs("in")
                                .addLayer("0", new ConvolutionLayer.Builder(2, 2).stride(1, 1).nOut(3)
                                        .activation(afn).build(), "in")
                                .addLayer("1", new BatchNormalization.Builder().useLogStd(useLogStd).build(), "0")
                                .addLayer("2", new SubsamplingLayer.Builder(SubsamplingLayer.PoolingType.MAX)
                                        .kernelSize(2, 2).stride(1, 1).build(), "1")
                                .addLayer("3", new BatchNormalization.Builder().useLogStd(useLogStd).build(), "2")
                                .addLayer("4", new ActivationLayer.Builder().activation(afn).build(), "3")
                                .addLayer("5", new OutputLayer.Builder(lf).activation(outputActivation)
                                        .nOut(nOut).build(), "4")
                                .setOutputs("5").setInputTypes(InputType.convolutional(hw, hw, depth))
                                .build();

                        ComputationGraph net = new ComputationGraph(conf);
                        net.init();
                        String name = new Object() {
                        }.getClass().getEnclosingMethod().getName();

                        if (doLearningFirst) {
                            //Run a number of iterations of learning
                            net.setInput(0, ds.getFeatures());
                            net.setLabels(ds.getLabels());
                            net.computeGradientAndScore();
                            double scoreBefore = net.score();
                            for (int k = 0; k < 20; k++)
                                net.fit(ds);
                            net.computeGradientAndScore();
                            double scoreAfter = net.score();
                            //Can't test in 'characteristic mode of operation' if not learning
                            String msg = name
                                    + " - score did not (sufficiently) decrease during learning - activationFn="
                                    + afn + ", lossFn=" + lf + ", outputActivation=" + outputActivation
                                    + ", doLearningFirst= " + doLearningFirst + " (before=" + scoreBefore
                                    + ", scoreAfter=" + scoreAfter + ")";
                            assertTrue(msg, scoreAfter < 0.9 * scoreBefore);
                        }

                        System.out.println(name + " - activationFn=" + afn + ", lossFn=" + lf
                                + ", outputActivation=" + outputActivation + ", doLearningFirst="
                                + doLearningFirst + ", l1=" + l1vals[j] + ", l2=" + l2vals[j]);
//                        for (int k = 0; k < net.getNumLayers(); k++)
//                            System.out.println("Layer " + k + " # params: " + net.getLayer(k).numParams());

                        //Mean and variance vars are not gradient checkable; mean/variance "gradient" is used to implement running mean/variance calc
                        //i.e., runningMean = decay * runningMean + (1-decay) * batchMean
                        //However, numerical gradient will be 0 as forward pass doesn't depend on this "parameter"
                        Set<String> excludeParams = new HashSet<>(Arrays.asList("1_mean", "1_var", "3_mean", "3_var", "1_log10stdev", "3_log10stdev"));
                        boolean gradOK = GradientCheckUtil.checkGradients(new GradientCheckUtil.GraphConfig().net(net).inputs(new INDArray[]{input})
                                .labels(new INDArray[]{labels}).excludeParams(excludeParams));

                        assertTrue(gradOK);
                        TestUtils.testModelSerialization(net);
                    }
                }
            }
        }
    }

Example 14

Source File: LSTMGradientCheckTests.java From deeplearning4j with Apache License 2.0

4 votes

@Test
    public void testGradientGravesBidirectionalLSTMFull() {
        Activation[] activFns = {Activation.TANH, Activation.SOFTSIGN};

        LossFunction[] lossFunctions = {LossFunction.MCXENT, LossFunction.MSE};
        Activation[] outputActivations = {Activation.SOFTMAX, Activation.TANH}; //i.e., lossFunctions[i] used with outputActivations[i] here

        int timeSeriesLength = 3;
        int nIn = 2;
        int layerSize = 2;
        int nOut = 2;
        int miniBatchSize = 3;

        Random r = new Random(12345L);
        INDArray input = Nd4j.rand(DataType.DOUBLE, miniBatchSize, nIn, timeSeriesLength).subi(0.5);

        INDArray labels = TestUtils.randomOneHotTimeSeries(miniBatchSize, nOut, timeSeriesLength);

        //use l2vals[i] with l1vals[i]
        double[] l2vals = {0.4, 0.0};
        double[] l1vals = {0.5, 0.0};
        double[] biasL2 = {0.0, 0.2};
        double[] biasL1 = {0.0, 0.6};

        for (int i = 0; i < lossFunctions.length; i++) {
            for (int k = 0; k < l2vals.length; k++) {
                Activation afn = activFns[i];
                LossFunction lf = lossFunctions[i];
                Activation outputActivation = outputActivations[i];
                double l2 = l2vals[k];
                double l1 = l1vals[k];

                NeuralNetConfiguration.Builder conf =
                        new NeuralNetConfiguration.Builder();
                if (l1 > 0.0)
                    conf.l1(l1);
                if (l2 > 0.0)
                    conf.l2(l2);
                if (biasL2[k] > 0)
                    conf.l2Bias(biasL2[k]);
                if (biasL1[k] > 0)
                    conf.l1Bias(biasL1[k]);

                MultiLayerConfiguration mlc = conf.seed(12345L)
                        .dataType(DataType.DOUBLE)
                        .updater(new NoOp())
                        .list().layer(0,
                                new GravesBidirectionalLSTM.Builder().nIn(nIn).nOut(layerSize)
                                        .weightInit(new NormalDistribution(0, 1))
                                        .activation(afn)
                                        .build())
                        .layer(1, new RnnOutputLayer.Builder(lf).activation(outputActivation).nIn(layerSize)
                                .nOut(nOut)
                                .dist(new NormalDistribution(0, 1)).updater(new NoOp()).build())
                        .build();


                MultiLayerNetwork mln = new MultiLayerNetwork(mlc);

                mln.init();

                if (PRINT_RESULTS) {
                    System.out.println("testGradientGravesBidirectionalLSTMFull() - activationFn=" + afn
                            + ", lossFn=" + lf + ", outputActivation=" + outputActivation + ", l2=" + l2
                            + ", l1=" + l1);
//                    for (int j = 0; j < mln.getnLayers(); j++)
//                        System.out.println("Layer " + j + " # params: " + mln.getLayer(j).numParams());
                }

                boolean gradOK = GradientCheckUtil.checkGradients(mln, DEFAULT_EPS, DEFAULT_MAX_REL_ERROR,
                        DEFAULT_MIN_ABS_ERROR, PRINT_RESULTS, RETURN_ON_FIRST_FAILURE, input, labels);

                String msg = "testGradientGravesLSTMFull() - activationFn=" + afn + ", lossFn=" + lf
                        + ", outputActivation=" + outputActivation + ", l2=" + l2 + ", l1=" + l1;
                assertTrue(msg, gradOK);
                TestUtils.testModelSerialization(mln);
            }
        }
    }

Example 15

Source File: LSTMGradientCheckTests.java From deeplearning4j with Apache License 2.0

4 votes

@Test
    public void testGradientLSTMFull() {

        int timeSeriesLength = 4;
        int nIn = 3;
        int layerSize = 4;
        int nOut = 2;
        int miniBatchSize = 2;

        boolean[] gravesLSTM = new boolean[] {true, false};

        for (boolean graves : gravesLSTM) {

            Random r = new Random(12345L);
            INDArray input = Nd4j.rand(new int[]{miniBatchSize, nIn, timeSeriesLength}, 'f').subi(0.5);

            INDArray labels = Nd4j.zeros(miniBatchSize, nOut, timeSeriesLength);
            for (int i = 0; i < miniBatchSize; i++) {
                for (int j = 0; j < timeSeriesLength; j++) {
                    int idx = r.nextInt(nOut);
                    labels.putScalar(new int[] {i, idx, j}, 1.0f);
                }
            }


            //use l2vals[i] with l1vals[i]
            double[] l2vals = {0.4, 0.0};
            double[] l1vals = {0.0, 0.5};
            double[] biasL2 = {0.3, 0.0};
            double[] biasL1 = {0.0, 0.6};
            Activation[] activFns = {Activation.TANH, Activation.SOFTSIGN};
            LossFunction[] lossFunctions = {LossFunction.MCXENT, LossFunction.MSE};
            Activation[] outputActivations = {Activation.SOFTMAX, Activation.TANH};

            for (int i = 0; i < l2vals.length; i++) {

                LossFunction lf = lossFunctions[i];
                Activation outputActivation = outputActivations[i];
                double l2 = l2vals[i];
                double l1 = l1vals[i];
                Activation afn = activFns[i];

                NeuralNetConfiguration.Builder conf =
                        new NeuralNetConfiguration.Builder()
                                .dataType(DataType.DOUBLE)
                                .seed(12345L)
                                .dist(new NormalDistribution(0, 1)).updater(new NoOp());

                if (l1 > 0.0)
                    conf.l1(l1);
                if (l2 > 0.0)
                    conf.l2(l2);
                if (biasL2[i] > 0)
                    conf.l2Bias(biasL2[i]);
                if (biasL1[i] > 0)
                    conf.l1Bias(biasL1[i]);

                Layer layer;
                if (graves) {
                    layer = new GravesLSTM.Builder().nIn(nIn).nOut(layerSize).activation(afn).build();
                } else {
                    layer = new LSTM.Builder().nIn(nIn).nOut(layerSize).activation(afn).build();
                }

                NeuralNetConfiguration.ListBuilder conf2 = conf.list().layer(0, layer)
                        .layer(1, new RnnOutputLayer.Builder(lf).activation(outputActivation)
                                .nIn(layerSize).nOut(nOut).build())
                        ;

                MultiLayerNetwork mln = new MultiLayerNetwork(conf2.build());
                mln.init();

                String testName = "testGradientLSTMFull(" + (graves ? "GravesLSTM" : "LSTM")
                        + " - activationFn=" + afn + ", lossFn=" + lf + ", outputActivation="
                        + outputActivation + ", l2=" + l2 + ", l1=" + l1;
                if (PRINT_RESULTS) {
                    System.out.println(testName);
//                    for (int j = 0; j < mln.getnLayers(); j++)
//                        System.out.println("Layer " + j + " # params: " + mln.getLayer(j).numParams());
                }

                boolean gradOK = GradientCheckUtil.checkGradients(new GradientCheckUtil.MLNConfig().net(mln).input(input)
                        .labels(labels).subset(true).maxPerParam(128));

                assertTrue(testName, gradOK);
                TestUtils.testModelSerialization(mln);
            }
        }
    }

Example 16

Source File: GradientCheckTestsMasking.java From deeplearning4j with Apache License 2.0

4 votes

@Test
public void testPerOutputMaskingMLP() {
    int nIn = 6;
    int layerSize = 4;

    INDArray mask1 = Nd4j.create(new double[] {1, 0, 0, 1, 0}).reshape(1, -1);
    INDArray mask3 = Nd4j.create(new double[][] {{1, 1, 1, 1, 1}, {0, 1, 0, 1, 0}, {1, 0, 0, 1, 1}});
    INDArray[] labelMasks = new INDArray[] {mask1, mask3};

    ILossFunction[] lossFunctions = new ILossFunction[] {new LossBinaryXENT(),
                    //                new LossCosineProximity(),    //Doesn't support per-output masking, as it doesn't make sense for cosine proximity
                    new LossHinge(), new LossKLD(), new LossKLD(), new LossL1(), new LossL2(), new LossMAE(),
                    new LossMAE(), new LossMAPE(), new LossMAPE(),
                    //                new LossMCXENT(),             //Per output masking on MCXENT+Softmax: not yet supported
                    new LossMCXENT(), new LossMSE(), new LossMSE(), new LossMSLE(), new LossMSLE(),
                    new LossNegativeLogLikelihood(), new LossPoisson(), new LossSquaredHinge()};

    Activation[] act = new Activation[] {Activation.SIGMOID, //XENT
                    //                Activation.TANH,
                    Activation.TANH, //Hinge
                    Activation.SIGMOID, //KLD
                    Activation.SOFTMAX, //KLD + softmax
                    Activation.TANH, //L1
                    Activation.TANH, //L2
                    Activation.TANH, //MAE
                    Activation.SOFTMAX, //MAE + softmax
                    Activation.TANH, //MAPE
                    Activation.SOFTMAX, //MAPE + softmax
                    //                Activation.SOFTMAX, //MCXENT + softmax: see comment above
                    Activation.SIGMOID, //MCXENT + sigmoid
                    Activation.TANH, //MSE
                    Activation.SOFTMAX, //MSE + softmax
                    Activation.SIGMOID, //MSLE - needs positive labels/activations (due to log)
                    Activation.SOFTMAX, //MSLE + softmax
                    Activation.SIGMOID, //NLL
                    Activation.SIGMOID, //Poisson
                    Activation.TANH //Squared hinge
    };

    for (INDArray labelMask : labelMasks) {

        val minibatch = labelMask.size(0);
        val nOut = labelMask.size(1);

        for (int i = 0; i < lossFunctions.length; i++) {
            ILossFunction lf = lossFunctions[i];
            Activation a = act[i];


            MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder().updater(new NoOp())
                            .dataType(DataType.DOUBLE)
                            .dist(new NormalDistribution(0, 1)).seed(12345)
                            .list()
                            .layer(0, new DenseLayer.Builder().nIn(nIn).nOut(layerSize).activation(Activation.TANH)
                                            .build())
                            .layer(1, new OutputLayer.Builder().nIn(layerSize).nOut(nOut).lossFunction(lf)
                                            .activation(a).build())
                            .validateOutputLayerConfig(false)
                            .build();

            MultiLayerNetwork net = new MultiLayerNetwork(conf);
            net.init();

            INDArray[] fl = LossFunctionGradientCheck.getFeaturesAndLabels(lf, minibatch, nIn, nOut, 12345);
            INDArray features = fl[0];
            INDArray labels = fl[1];

            String msg = "testPerOutputMaskingMLP(): maskShape = " + Arrays.toString(labelMask.shape())
                            + ", loss function = " + lf + ", activation = " + a;

            System.out.println(msg);

            boolean gradOK = GradientCheckUtil.checkGradients(new GradientCheckUtil.MLNConfig().net(net).input(features)
                    .labels(labels).labelMask(labelMask));

            assertTrue(msg, gradOK);
            TestUtils.testModelSerialization(net);
        }
    }
}

Example 17

Source File: GradientCheckTestsMasking.java From deeplearning4j with Apache License 2.0

4 votes

@Test
public void gradientCheckMaskingOutputSimple() {

    int timeSeriesLength = 5;
    boolean[][] mask = new boolean[5][0];
    mask[0] = new boolean[] {true, true, true, true, true}; //No masking
    mask[1] = new boolean[] {false, true, true, true, true}; //mask first output time step
    mask[2] = new boolean[] {false, false, false, false, true}; //time series classification: mask all but last
    mask[3] = new boolean[] {false, false, true, false, true}; //time series classification w/ variable length TS
    mask[4] = new boolean[] {true, true, true, false, true}; //variable length TS

    int nIn = 3;
    int layerSize = 3;

    GradientCheckSimpleScenario[] scenarios = new GradientCheckSimpleScenario[] {
                    new GradientCheckSimpleScenario(LossFunctions.LossFunction.MCXENT.getILossFunction(),
                                    Activation.SOFTMAX, 2, 2),
                    new GradientCheckSimpleScenario(LossMixtureDensity.builder().gaussians(2).labelWidth(3).build(),
                                    Activation.TANH, 10, 3),
                    new GradientCheckSimpleScenario(LossMixtureDensity.builder().gaussians(2).labelWidth(4).build(),
                                    Activation.IDENTITY, 12, 4)};

    for (GradientCheckSimpleScenario s : scenarios) {

        Random r = new Random(12345L);
        INDArray input = Nd4j.rand(DataType.DOUBLE, 1, nIn, timeSeriesLength).subi(0.5);

        INDArray labels = Nd4j.zeros(DataType.DOUBLE, 1, s.labelWidth, timeSeriesLength);
        for (int m = 0; m < 1; m++) {
            for (int j = 0; j < timeSeriesLength; j++) {
                int idx = r.nextInt(s.labelWidth);
                labels.putScalar(new int[] {m, idx, j}, 1.0f);
            }
        }

        for (int i = 0; i < mask.length; i++) {

            //Create mask array:
            INDArray maskArr = Nd4j.create(1, timeSeriesLength);
            for (int j = 0; j < mask[i].length; j++) {
                maskArr.putScalar(new int[] {0, j}, mask[i][j] ? 1.0 : 0.0);
            }

            MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder().seed(12345L)
                    .dataType(DataType.DOUBLE)
                    .updater(new NoOp())
                    .list()
                    .layer(0, new SimpleRnn.Builder().nIn(nIn).nOut(layerSize)
                            .weightInit(new NormalDistribution(0, 1)).build())
                    .layer(1, new RnnOutputLayer.Builder(s.lf).activation(s.act).nIn(layerSize).nOut(s.nOut)
                            .weightInit(new NormalDistribution(0, 1)).build())
                    .build();
            MultiLayerNetwork mln = new MultiLayerNetwork(conf);
            mln.init();

            boolean gradOK = GradientCheckUtil.checkGradients(new GradientCheckUtil.MLNConfig().net(mln).input(input)
                    .labels(labels).labelMask(maskArr));

            String msg = "gradientCheckMaskingOutputSimple() - timeSeriesLength=" + timeSeriesLength
                            + ", miniBatchSize=" + 1;
            assertTrue(msg, gradOK);
            TestUtils.testModelSerialization(mln);
        }
    }
}

Example 18

Source File: GradientCheckTests.java From deeplearning4j with Apache License 2.0

4 votes

@Test
public void testGradientWeightDecay() {

    Activation[] activFns = {Activation.SIGMOID, Activation.TANH, Activation.THRESHOLDEDRELU};
    boolean[] characteristic = {false, true}; //If true: run some backprop steps first

    LossFunction[] lossFunctions = {LossFunction.MCXENT, LossFunction.MSE};
    Activation[] outputActivations = {Activation.SOFTMAX, Activation.TANH}; //i.e., lossFunctions[i] used with outputActivations[i] here

    DataNormalization scaler = new NormalizerMinMaxScaler();
    DataSetIterator iter = new IrisDataSetIterator(150, 150);
    scaler.fit(iter);
    iter.setPreProcessor(scaler);
    DataSet ds = iter.next();

    INDArray input = ds.getFeatures();
    INDArray labels = ds.getLabels();

    //use l2vals[i] with l1vals[i]
    double[] l2vals = {0.4, 0.0, 0.4, 0.4, 0.0, 0.0};
    double[] l1vals = {0.0, 0.0, 0.5, 0.0, 0.5, 0.0};
    double[] biasL2 = {0.0, 0.0, 0.0, 0.2, 0.0, 0.0};
    double[] biasL1 = {0.0, 0.0, 0.6, 0.0, 0.0, 0.5};
    double[] wdVals = {0.0, 0.0, 0.0, 0.0, 0.4, 0.0};
    double[] wdBias = {0.0, 0.0, 0.0, 0.0, 0.0, 0.4};

    for (Activation afn : activFns) {
        for (int i = 0; i < lossFunctions.length; i++) {
            for (int k = 0; k < l2vals.length; k++) {
                LossFunction lf = lossFunctions[i];
                Activation outputActivation = outputActivations[i];
                double l2 = l2vals[k];
                double l1 = l1vals[k];

                MultiLayerConfiguration conf =
                        new NeuralNetConfiguration.Builder().l2(l2).l1(l1)
                                .dataType(DataType.DOUBLE)
                                .l2Bias(biasL2[k]).l1Bias(biasL1[k])
                                .weightDecay(wdVals[k]).weightDecayBias(wdBias[k])
                                .optimizationAlgo(OptimizationAlgorithm.CONJUGATE_GRADIENT)
                                .seed(12345L)
                                .list().layer(0,
                                new DenseLayer.Builder().nIn(4).nOut(3)
                                        .dist(new NormalDistribution(0,
                                                1))
                                        .updater(new NoOp())
                                        .activation(afn).build())
                                .layer(1, new OutputLayer.Builder(lf).nIn(3).nOut(3)
                                        .dist(new NormalDistribution(0, 1))
                                        .updater(new NoOp())
                                        .activation(outputActivation).build())
                                .build();

                MultiLayerNetwork mln = new MultiLayerNetwork(conf);
                mln.init();

                boolean gradOK1 = GradientCheckUtil.checkGradients(mln, DEFAULT_EPS, DEFAULT_MAX_REL_ERROR,
                        DEFAULT_MIN_ABS_ERROR, PRINT_RESULTS, RETURN_ON_FIRST_FAILURE, input, labels);

                String msg = "testGradientWeightDecay() - activationFn=" + afn + ", lossFn=" + lf
                        + ", outputActivation=" + outputActivation + ", l2=" + l2 + ", l1=" + l1;
                assertTrue(msg, gradOK1);

                TestUtils.testModelSerialization(mln);
            }
        }
    }
}

Example 19

Source File: GradientCheckTests.java From deeplearning4j with Apache License 2.0

4 votes

@Test
    public void testAutoEncoder() {
        //As above (testGradientMLP2LayerIrisSimple()) but with L2, L1, and both L2/L1 applied
        //Need to run gradient through updater, so that L2 can be applied

        Activation[] activFns = {Activation.SIGMOID, Activation.TANH};
        boolean[] characteristic = {false, true}; //If true: run some backprop steps first

        LossFunction[] lossFunctions = {LossFunction.MCXENT, LossFunction.MSE};
        Activation[] outputActivations = {Activation.SOFTMAX, Activation.TANH};

        DataNormalization scaler = new NormalizerMinMaxScaler();
        DataSetIterator iter = new IrisDataSetIterator(150, 150);
        scaler.fit(iter);
        iter.setPreProcessor(scaler);
        DataSet ds = iter.next();
        INDArray input = ds.getFeatures();
        INDArray labels = ds.getLabels();

        NormalizerStandardize norm = new NormalizerStandardize();
        norm.fit(ds);
        norm.transform(ds);

        double[] l2vals = {0.2, 0.0, 0.2};
        double[] l1vals = {0.0, 0.3, 0.3}; //i.e., use l2vals[i] with l1vals[i]

        for (Activation afn : activFns) {
            for (boolean doLearningFirst : characteristic) {
                for (int i = 0; i < lossFunctions.length; i++) {
                    for (int k = 0; k < l2vals.length; k++) {
                        LossFunction lf = lossFunctions[i];
                        Activation outputActivation = outputActivations[i];
                        double l2 = l2vals[k];
                        double l1 = l1vals[k];

                        Nd4j.getRandom().setSeed(12345);
                        MultiLayerConfiguration conf =
                                        new NeuralNetConfiguration.Builder()
                                                        .dataType(DataType.DOUBLE)
                                                        .updater(new NoOp())
                                                        .l2(l2).l1(l1)
                                                        .optimizationAlgo(OptimizationAlgorithm.CONJUGATE_GRADIENT)
                                                        .seed(12345L)
                                                        .dist(new NormalDistribution(0, 1))
                                                        .list().layer(0,
                                                                        new AutoEncoder.Builder().nIn(4).nOut(3)
                                                                                        .activation(afn).build())
                                                        .layer(1, new OutputLayer.Builder(lf).nIn(3).nOut(3)
                                                                        .activation(outputActivation).build())
                                                        .build();

                        MultiLayerNetwork mln = new MultiLayerNetwork(conf);
                        mln.init();

                        String msg;
                        if (doLearningFirst) {
                            //Run a number of iterations of learning
                            mln.setInput(ds.getFeatures());
                            mln.setLabels(ds.getLabels());
                            mln.computeGradientAndScore();
                            double scoreBefore = mln.score();
                            for (int j = 0; j < 10; j++)
                                mln.fit(ds);
                            mln.computeGradientAndScore();
                            double scoreAfter = mln.score();
                            //Can't test in 'characteristic mode of operation' if not learning
                            msg = "testGradMLP2LayerIrisSimple() - score did not (sufficiently) decrease during learning - activationFn="
                                            + afn + ", lossFn=" + lf + ", outputActivation=" + outputActivation
                                            + ", doLearningFirst=" + doLearningFirst + ", l2=" + l2 + ", l1=" + l1
                                            + " (before=" + scoreBefore + ", scoreAfter=" + scoreAfter + ")";
                            assertTrue(msg, scoreAfter < scoreBefore);
                        }

                        msg = "testGradMLP2LayerIrisSimple() - activationFn=" + afn + ", lossFn=" + lf
                                        + ", outputActivation=" + outputActivation + ", doLearningFirst="
                                        + doLearningFirst + ", l2=" + l2 + ", l1=" + l1;
                        if (PRINT_RESULTS) {
                            System.out.println(msg);
//                            for (int j = 0; j < mln.getnLayers(); j++)
//                                System.out.println("Layer " + j + " # params: " + mln.getLayer(j).numParams());
                        }

                        boolean gradOK = GradientCheckUtil.checkGradients(mln, DEFAULT_EPS, DEFAULT_MAX_REL_ERROR,
                                        DEFAULT_MIN_ABS_ERROR, PRINT_RESULTS, RETURN_ON_FIRST_FAILURE, input, labels);
                        assertTrue(msg, gradOK);
                        TestUtils.testModelSerialization(mln);
                    }
                }
            }
        }
    }

Example 20

Source File: Vasttext.java From scava with Eclipse Public License 2.0

4 votes

private MultiLayerNetwork VasttextTextual()
{
	Activation activation = null;
	LossFunction loss = null;
	//If multilabel, it is considered according to the book "Deep Learning with Python" to use the following parameters
	if(multiLabel)
	{
		activation = Activation.SIGMOID;
		loss = LossFunction.XENT; //Binary Crossentropy
	}
	else
	{
		//We're using a softmax/cross entropy for the binary classification, as the number of neurons is two. If the number of neurons would be one, then
		//the activation would be sigmoid and the loss binary crossentropy
		activation = Activation.SOFTMAX;
		loss = LossFunction.MCXENT;	//CATEGORICAL_CROSSENTROPY
	}

	MultiLayerConfiguration nnConf = new NeuralNetConfiguration.Builder()
               .updater(new Adam(lr))
               .weightInit(WeightInit.XAVIER)
               .trainingWorkspaceMode(WorkspaceMode.ENABLED)
               .inferenceWorkspaceMode(WorkspaceMode.ENABLED)
               .list()
               .layer(0, new EmbeddingSequenceLayer.Builder()
                       .nIn(textFeaturesSize)
                       .nOut(denseDimension)
                       .activation(Activation.IDENTITY)
                       .build())
               .layer(1, new GlobalPoolingLayer.Builder()
                       .poolingType(PoolingType.AVG)
                       .poolingDimensions(2)
                       .collapseDimensions(true)
                       .build())
               .layer(2, new OutputLayer.Builder()
                       .nIn(denseDimension)
                       .nOut(labelsSize)
                       .activation(activation)
                       .lossFunction(loss)
                       .build())
               .pretrain(false).backprop(true).build();

       return new MultiLayerNetwork(nnConf);
}