Java Code Examples for org.nd4j.linalg.lossfunctions.LossFunctions#LossFunction

The following examples show how to use org.nd4j.linalg.lossfunctions.LossFunctions#LossFunction . You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: BackTrackLineSearchTest.java    From deeplearning4j with Apache License 2.0 5 votes vote down vote up
private static OutputLayer getIrisLogisticLayerConfig(Activation activationFunction, int maxIterations,
                LossFunctions.LossFunction lossFunction) {
    NeuralNetConfiguration conf =
                    new NeuralNetConfiguration.Builder().seed(12345L).miniBatch(true)
                                    .maxNumLineSearchIterations(maxIterations)
                                    .layer(new org.deeplearning4j.nn.conf.layers.OutputLayer.Builder(lossFunction)
                                                    .nIn(4).nOut(3).activation(activationFunction)
                                                    .weightInit(WeightInit.XAVIER).build())
                                    .build();

    val numParams = conf.getLayer().initializer().numParams(conf);
    INDArray params = Nd4j.create(1, numParams);
    return (OutputLayer) conf.getLayer().instantiate(conf, null, 0, params, true, params.dataType());
}
 
Example 2
Source File: KerasLossUtils.java    From deeplearning4j with Apache License 2.0 4 votes vote down vote up
/**
 * Map Keras to DL4J loss functions.
 *
 * @param kerasLoss String containing Keras loss function name
 * @return String containing DL4J loss function
 */
public static ILossFunction mapLossFunction(String kerasLoss, KerasLayerConfiguration conf)
        throws UnsupportedKerasConfigurationException {
    LossFunctions.LossFunction dl4jLoss;
    if (kerasLoss.equals(conf.getKERAS_LOSS_MEAN_SQUARED_ERROR()) ||
            kerasLoss.equals(conf.getKERAS_LOSS_MSE())) {
        dl4jLoss = LossFunctions.LossFunction.SQUARED_LOSS;
    } else if (kerasLoss.equals(conf.getKERAS_LOSS_MEAN_ABSOLUTE_ERROR()) ||
            kerasLoss.equals(conf.getKERAS_LOSS_MAE())) {
        dl4jLoss = LossFunctions.LossFunction.MEAN_ABSOLUTE_ERROR;
    } else if (kerasLoss.equals(conf.getKERAS_LOSS_MEAN_ABSOLUTE_PERCENTAGE_ERROR()) ||
            kerasLoss.equals(conf.getKERAS_LOSS_MAPE())) {
        dl4jLoss = LossFunctions.LossFunction.MEAN_ABSOLUTE_PERCENTAGE_ERROR;
    } else if (kerasLoss.equals(conf.getKERAS_LOSS_MEAN_SQUARED_LOGARITHMIC_ERROR()) ||
            kerasLoss.equals(conf.getKERAS_LOSS_MSLE())) {
        dl4jLoss = LossFunctions.LossFunction.MEAN_SQUARED_LOGARITHMIC_ERROR;
    } else if (kerasLoss.equals(conf.getKERAS_LOSS_SQUARED_HINGE())) {
        dl4jLoss = LossFunctions.LossFunction.SQUARED_HINGE;
    } else if (kerasLoss.equals(conf.getKERAS_LOSS_HINGE())) {
        dl4jLoss = LossFunctions.LossFunction.HINGE;
    } else if (kerasLoss.equals(conf.getKERAS_LOSS_SPARSE_CATEGORICAL_CROSSENTROPY())) {
        dl4jLoss = LossFunctions.LossFunction.SPARSE_MCXENT;
    } else if (kerasLoss.equals(conf.getKERAS_LOSS_BINARY_CROSSENTROPY())) {
        dl4jLoss = LossFunctions.LossFunction.XENT;
    } else if (kerasLoss.equals(conf.getKERAS_LOSS_CATEGORICAL_CROSSENTROPY())) {
        dl4jLoss = LossFunctions.LossFunction.MCXENT;
    } else if (kerasLoss.equals(conf.getKERAS_LOSS_KULLBACK_LEIBLER_DIVERGENCE()) ||
            kerasLoss.equals(conf.getKERAS_LOSS_KLD())) {
        dl4jLoss = LossFunctions.LossFunction.KL_DIVERGENCE;
    } else if (kerasLoss.equals(conf.getKERAS_LOSS_POISSON())) {
        dl4jLoss = LossFunctions.LossFunction.POISSON;
    } else if (kerasLoss.equals(conf.getKERAS_LOSS_COSINE_PROXIMITY())) {
        dl4jLoss = LossFunctions.LossFunction.COSINE_PROXIMITY;
    } else {
        ILossFunction lossClass = customLoss.get(kerasLoss);
        if(lossClass != null){
            return lossClass;
        }else{
            throw new UnsupportedKerasConfigurationException("Unknown Keras loss function " + kerasLoss);
        }
    }
    return dl4jLoss.getILossFunction();
}
 
Example 3
Source File: RnnLossLayer.java    From deeplearning4j with Apache License 2.0 4 votes vote down vote up
/**
 * @param lossFunction Loss function for the loss layer
 */
public Builder(LossFunctions.LossFunction lossFunction) {
    lossFunction(lossFunction);
}
 
Example 4
Source File: LossLayer.java    From deeplearning4j with Apache License 2.0 4 votes vote down vote up
/**
 * @param lossFunction Loss function for the loss layer
 */
public Builder(LossFunctions.LossFunction lossFunction) {
    lossFunction(lossFunction);
    this.activation(Activation.IDENTITY);
}
 
Example 5
Source File: BNGradientCheckTest.java    From deeplearning4j with Apache License 2.0 4 votes vote down vote up
@Test
    public void testGradientBNWithCNNandSubsamplingCompGraph() {
        //Parameterized test, testing combinations of:
        // (a) activation function
        // (b) Whether to test at random initialization, or after some learning (i.e., 'characteristic mode of operation')
        // (c) Loss function (with specified output activations)
        // (d) l1 and l2 values
        Activation[] activFns = {Activation.TANH, Activation.IDENTITY};
        boolean doLearningFirst = true;

        LossFunctions.LossFunction[] lossFunctions = {LossFunctions.LossFunction.NEGATIVELOGLIKELIHOOD};
        Activation[] outputActivations = {Activation.SOFTMAX}; //i.e., lossFunctions[i] used with outputActivations[i] here

        double[] l2vals = {0.0, 0.1};
        double[] l1vals = {0.0, 0.2}; //i.e., use l2vals[j] with l1vals[j]

        Nd4j.getRandom().setSeed(12345);
        int minibatch = 10;
        int depth = 2;
        int hw = 5;
        int nOut = 3;
        INDArray input = Nd4j.rand(new int[]{minibatch, depth, hw, hw});
        INDArray labels = Nd4j.zeros(minibatch, nOut);
        Random r = new Random(12345);
        for (int i = 0; i < minibatch; i++) {
            labels.putScalar(i, r.nextInt(nOut), 1.0);
        }

        DataSet ds = new DataSet(input, labels);

        for (boolean useLogStd : new boolean[]{true, false}) {
            for (Activation afn : activFns) {
                for (int i = 0; i < lossFunctions.length; i++) {
                    for (int j = 0; j < l2vals.length; j++) {
                        LossFunctions.LossFunction lf = lossFunctions[i];
                        Activation outputActivation = outputActivations[i];

                        ComputationGraphConfiguration conf = new NeuralNetConfiguration.Builder().seed(12345)
                                .dataType(DataType.DOUBLE)
                                .optimizationAlgo(OptimizationAlgorithm.LINE_GRADIENT_DESCENT)
                                .updater(new NoOp())
                                .dist(new UniformDistribution(-2, 2)).seed(12345L).graphBuilder()
                                .addInputs("in")
                                .addLayer("0", new ConvolutionLayer.Builder(2, 2).stride(1, 1).nOut(3)
                                        .activation(afn).build(), "in")
                                .addLayer("1", new BatchNormalization.Builder().useLogStd(useLogStd).build(), "0")
                                .addLayer("2", new SubsamplingLayer.Builder(SubsamplingLayer.PoolingType.MAX)
                                        .kernelSize(2, 2).stride(1, 1).build(), "1")
                                .addLayer("3", new BatchNormalization.Builder().useLogStd(useLogStd).build(), "2")
                                .addLayer("4", new ActivationLayer.Builder().activation(afn).build(), "3")
                                .addLayer("5", new OutputLayer.Builder(lf).activation(outputActivation)
                                        .nOut(nOut).build(), "4")
                                .setOutputs("5").setInputTypes(InputType.convolutional(hw, hw, depth))
                                .build();

                        ComputationGraph net = new ComputationGraph(conf);
                        net.init();
                        String name = new Object() {
                        }.getClass().getEnclosingMethod().getName();

                        if (doLearningFirst) {
                            //Run a number of iterations of learning
                            net.setInput(0, ds.getFeatures());
                            net.setLabels(ds.getLabels());
                            net.computeGradientAndScore();
                            double scoreBefore = net.score();
                            for (int k = 0; k < 20; k++)
                                net.fit(ds);
                            net.computeGradientAndScore();
                            double scoreAfter = net.score();
                            //Can't test in 'characteristic mode of operation' if not learning
                            String msg = name
                                    + " - score did not (sufficiently) decrease during learning - activationFn="
                                    + afn + ", lossFn=" + lf + ", outputActivation=" + outputActivation
                                    + ", doLearningFirst= " + doLearningFirst + " (before=" + scoreBefore
                                    + ", scoreAfter=" + scoreAfter + ")";
                            assertTrue(msg, scoreAfter < 0.9 * scoreBefore);
                        }

                        System.out.println(name + " - activationFn=" + afn + ", lossFn=" + lf
                                + ", outputActivation=" + outputActivation + ", doLearningFirst="
                                + doLearningFirst + ", l1=" + l1vals[j] + ", l2=" + l2vals[j]);
//                        for (int k = 0; k < net.getNumLayers(); k++)
//                            System.out.println("Layer " + k + " # params: " + net.getLayer(k).numParams());

                        //Mean and variance vars are not gradient checkable; mean/variance "gradient" is used to implement running mean/variance calc
                        //i.e., runningMean = decay * runningMean + (1-decay) * batchMean
                        //However, numerical gradient will be 0 as forward pass doesn't depend on this "parameter"
                        Set<String> excludeParams = new HashSet<>(Arrays.asList("1_mean", "1_var", "3_mean", "3_var", "1_log10stdev", "3_log10stdev"));
                        boolean gradOK = GradientCheckUtil.checkGradients(new GradientCheckUtil.GraphConfig().net(net).inputs(new INDArray[]{input})
                                .labels(new INDArray[]{labels}).excludeParams(excludeParams));

                        assertTrue(gradOK);
                        TestUtils.testModelSerialization(net);
                    }
                }
            }
        }
    }
 
Example 6
Source File: CNNGradientCheckTest.java    From deeplearning4j with Apache License 2.0 4 votes vote down vote up
@Test
    public void testGradientCNNMLN() {
        if(this.format != CNN2DFormat.NCHW) //Only test NCHW due to flat input format...
            return;

        //Parameterized test, testing combinations of:
        // (a) activation function
        // (b) Whether to test at random initialization, or after some learning (i.e., 'characteristic mode of operation')
        // (c) Loss function (with specified output activations)
        Activation[] activFns = {Activation.SIGMOID, Activation.TANH};
        boolean[] characteristic = {false, true}; //If true: run some backprop steps first

        LossFunctions.LossFunction[] lossFunctions =
                {LossFunctions.LossFunction.NEGATIVELOGLIKELIHOOD, LossFunctions.LossFunction.MSE};
        Activation[] outputActivations = {Activation.SOFTMAX, Activation.TANH}; //i.e., lossFunctions[i] used with outputActivations[i] here

        DataSet ds = new IrisDataSetIterator(150, 150).next();
        ds.normalizeZeroMeanZeroUnitVariance();
        INDArray input = ds.getFeatures();
        INDArray labels = ds.getLabels();

        for (Activation afn : activFns) {
            for (boolean doLearningFirst : characteristic) {
                for (int i = 0; i < lossFunctions.length; i++) {
                    LossFunctions.LossFunction lf = lossFunctions[i];
                    Activation outputActivation = outputActivations[i];

                    MultiLayerConfiguration.Builder builder = new NeuralNetConfiguration.Builder()
                            .dataType(DataType.DOUBLE)
                            .optimizationAlgo(OptimizationAlgorithm.CONJUGATE_GRADIENT).updater(new NoOp())
                            .weightInit(WeightInit.XAVIER).seed(12345L).list()
                            .layer(0, new ConvolutionLayer.Builder(1, 1).nOut(6).activation(afn).build())
                            .layer(1, new OutputLayer.Builder(lf).activation(outputActivation).nOut(3).build())
                            .setInputType(InputType.convolutionalFlat(1, 4, 1));

                    MultiLayerConfiguration conf = builder.build();

                    MultiLayerNetwork mln = new MultiLayerNetwork(conf);
                    mln.init();
                    String name = new Object() {
                    }.getClass().getEnclosingMethod().getName();

                    if (doLearningFirst) {
                        //Run a number of iterations of learning
                        mln.setInput(ds.getFeatures());
                        mln.setLabels(ds.getLabels());
                        mln.computeGradientAndScore();
                        double scoreBefore = mln.score();
                        for (int j = 0; j < 10; j++)
                            mln.fit(ds);
                        mln.computeGradientAndScore();
                        double scoreAfter = mln.score();
                        //Can't test in 'characteristic mode of operation' if not learning
                        String msg = name + " - score did not (sufficiently) decrease during learning - activationFn="
                                + afn + ", lossFn=" + lf + ", outputActivation=" + outputActivation
                                + ", doLearningFirst= " + doLearningFirst + " (before=" + scoreBefore
                                + ", scoreAfter=" + scoreAfter + ")";
                        assertTrue(msg, scoreAfter < 0.9 * scoreBefore);
                    }

                    if (PRINT_RESULTS) {
                        System.out.println(name + " - activationFn=" + afn + ", lossFn=" + lf + ", outputActivation="
                                + outputActivation + ", doLearningFirst=" + doLearningFirst);
//                        for (int j = 0; j < mln.getnLayers(); j++)
//                            System.out.println("Layer " + j + " # params: " + mln.getLayer(j).numParams());
                    }

                    boolean gradOK = GradientCheckUtil.checkGradients(mln, DEFAULT_EPS, DEFAULT_MAX_REL_ERROR,
                            DEFAULT_MIN_ABS_ERROR, PRINT_RESULTS, RETURN_ON_FIRST_FAILURE, input, labels);

                    assertTrue(gradOK);
                    TestUtils.testModelSerialization(mln);
                }
            }
        }
    }
 
Example 7
Source File: CNNGradientCheckTest.java    From deeplearning4j with Apache License 2.0 4 votes vote down vote up
@Test
    public void testGradientCNNL1L2MLN() {
        if(this.format != CNN2DFormat.NCHW) //Only test NCHW due to flat input format...
            return;

        //Parameterized test, testing combinations of:
        // (a) activation function
        // (b) Whether to test at random initialization, or after some learning (i.e., 'characteristic mode of operation')
        // (c) Loss function (with specified output activations)

        DataSet ds = new IrisDataSetIterator(150, 150).next();
        ds.normalizeZeroMeanZeroUnitVariance();
        INDArray input = ds.getFeatures();
        INDArray labels = ds.getLabels();

        //use l2vals[i] with l1vals[i]
        double[] l2vals = {0.4, 0.0, 0.4, 0.4};
        double[] l1vals = {0.0, 0.0, 0.5, 0.0};
        double[] biasL2 = {0.0, 0.0, 0.0, 0.2};
        double[] biasL1 = {0.0, 0.0, 0.6, 0.0};
        Activation[] activFns = {Activation.SIGMOID, Activation.TANH, Activation.ELU, Activation.SOFTPLUS};
        boolean[] characteristic = {false, true, false, true}; //If true: run some backprop steps first

        LossFunctions.LossFunction[] lossFunctions =
                {LossFunctions.LossFunction.NEGATIVELOGLIKELIHOOD, LossFunctions.LossFunction.MSE, LossFunctions.LossFunction.NEGATIVELOGLIKELIHOOD, LossFunctions.LossFunction.MSE};
        Activation[] outputActivations = {Activation.SOFTMAX, Activation.TANH, Activation.SOFTMAX, Activation.IDENTITY}; //i.e., lossFunctions[i] used with outputActivations[i] here

        for( int i=0; i<l2vals.length; i++ ){
            Activation afn = activFns[i];
            boolean doLearningFirst = characteristic[i];
            LossFunctions.LossFunction lf = lossFunctions[i];
            Activation outputActivation = outputActivations[i];
            double l2 = l2vals[i];
            double l1 = l1vals[i];

            MultiLayerConfiguration.Builder builder = new NeuralNetConfiguration.Builder()
                    .dataType(DataType.DOUBLE)
                    .l2(l2).l1(l1).l2Bias(biasL2[i]).l1Bias(biasL1[i])
                    .optimizationAlgo(
                            OptimizationAlgorithm.CONJUGATE_GRADIENT)
                    .seed(12345L).list()
                    .layer(0, new ConvolutionLayer.Builder(new int[]{1, 1}).nIn(1).nOut(6)
                            .weightInit(WeightInit.XAVIER).activation(afn)
                            .updater(new NoOp()).build())
                    .layer(1, new OutputLayer.Builder(lf).activation(outputActivation).nOut(3)
                            .weightInit(WeightInit.XAVIER).updater(new NoOp()).build())

                    .setInputType(InputType.convolutionalFlat(1, 4, 1));

            MultiLayerConfiguration conf = builder.build();

            MultiLayerNetwork mln = new MultiLayerNetwork(conf);
            mln.init();
            String testName = new Object() {
            }.getClass().getEnclosingMethod().getName();

            if (doLearningFirst) {
                //Run a number of iterations of learning
                mln.setInput(ds.getFeatures());
                mln.setLabels(ds.getLabels());
                mln.computeGradientAndScore();
                double scoreBefore = mln.score();
                for (int j = 0; j < 10; j++)
                    mln.fit(ds);
                mln.computeGradientAndScore();
                double scoreAfter = mln.score();
                //Can't test in 'characteristic mode of operation' if not learning
                String msg = testName
                        + "- score did not (sufficiently) decrease during learning - activationFn="
                        + afn + ", lossFn=" + lf + ", outputActivation=" + outputActivation
                        + ", doLearningFirst=" + doLearningFirst + " (before=" + scoreBefore
                        + ", scoreAfter=" + scoreAfter + ")";
                assertTrue(msg, scoreAfter < 0.8 * scoreBefore);
            }

            if (PRINT_RESULTS) {
                System.out.println(testName + "- activationFn=" + afn + ", lossFn=" + lf
                        + ", outputActivation=" + outputActivation + ", doLearningFirst="
                        + doLearningFirst);
//                for (int j = 0; j < mln.getnLayers(); j++)
//                    System.out.println("Layer " + j + " # params: " + mln.getLayer(j).numParams());
            }

            boolean gradOK = GradientCheckUtil.checkGradients(mln, DEFAULT_EPS, DEFAULT_MAX_REL_ERROR,
                    DEFAULT_MIN_ABS_ERROR, PRINT_RESULTS, RETURN_ON_FIRST_FAILURE, input, labels);

            assertTrue(gradOK);
            TestUtils.testModelSerialization(mln);
        }
    }
 
Example 8
Source File: CustomOutputLayer.java    From deeplearning4j with Apache License 2.0 4 votes vote down vote up
public Builder(LossFunctions.LossFunction lossFunction) {
    super.lossFunction(lossFunction);
}
 
Example 9
Source File: MultiLayerNeuralNetConfigurationTest.java    From deeplearning4j with Apache License 2.0 4 votes vote down vote up
@Test
public void testInvalidOutputLayer(){
    /*
    Test case (invalid configs)
    1. nOut=1 + softmax
    2. mcxent + tanh
    3. xent + softmax
    4. xent + relu
    5. mcxent + sigmoid
     */

    LossFunctions.LossFunction[] lf = new LossFunctions.LossFunction[]{
            LossFunctions.LossFunction.MCXENT, LossFunctions.LossFunction.MCXENT, LossFunctions.LossFunction.XENT,
            LossFunctions.LossFunction.XENT, LossFunctions.LossFunction.MCXENT};
    int[] nOut = new int[]{1, 3, 3, 3, 3};
    Activation[] activations = new Activation[]{Activation.SOFTMAX, Activation.TANH, Activation.SOFTMAX, Activation.RELU, Activation.SIGMOID};
    for( int i=0; i<lf.length; i++ ){
        for(boolean lossLayer : new boolean[]{false, true}) {
            for (boolean validate : new boolean[]{true, false}) {
                String s = "nOut=" + nOut[i] + ",lossFn=" + lf[i] + ",lossLayer=" + lossLayer + ",validate=" + validate;
                if(nOut[i] == 1 && lossLayer)
                    continue;   //nOuts are not availabel in loss layer, can't expect it to detect this case
                try {
                    new NeuralNetConfiguration.Builder()
                            .list()
                            .layer(new DenseLayer.Builder().nIn(10).nOut(10).build())
                            .layer(!lossLayer ? new OutputLayer.Builder().nIn(10).nOut(nOut[i]).activation(activations[i]).lossFunction(lf[i]).build()
                                            : new LossLayer.Builder().activation(activations[i]).lossFunction(lf[i]).build())
                            .validateOutputLayerConfig(validate)
                            .build();
                    if (validate) {
                        fail("Expected exception: " + s);
                    }
                } catch (DL4JInvalidConfigException e) {
                    if (validate) {
                        assertTrue(s, e.getMessage().toLowerCase().contains("invalid output"));
                    } else {
                        fail("Validation should not be enabled");
                    }
                }
            }
        }
    }
}
 
Example 10
Source File: ComputationGraphConfigurationTest.java    From deeplearning4j with Apache License 2.0 4 votes vote down vote up
@Test
public void testInvalidOutputLayer(){
    /*
    Test case (invalid configs)
    1. nOut=1 + softmax
    2. mcxent + tanh
    3. xent + softmax
    4. xent + relu
    5. mcxent + sigmoid
     */

    LossFunctions.LossFunction[] lf = new LossFunctions.LossFunction[]{
            LossFunctions.LossFunction.MCXENT, LossFunctions.LossFunction.MCXENT, LossFunctions.LossFunction.XENT,
            LossFunctions.LossFunction.XENT, LossFunctions.LossFunction.MCXENT};
    int[] nOut = new int[]{1, 3, 3, 3, 3};
    Activation[] activations = new Activation[]{Activation.SOFTMAX, Activation.TANH, Activation.SOFTMAX, Activation.RELU, Activation.SIGMOID};
    for( int i=0; i<lf.length; i++ ){
        for(boolean lossLayer : new boolean[]{false, true}) {
            for (boolean validate : new boolean[]{true, false}) {
                String s = "nOut=" + nOut[i] + ",lossFn=" + lf[i] + ",lossLayer=" + lossLayer + ",validate=" + validate;
                if(nOut[i] == 1 && lossLayer)
                    continue;   //nOuts are not availabel in loss layer, can't expect it to detect this case
                try {
                    new NeuralNetConfiguration.Builder()
                            .graphBuilder()
                            .addInputs("in")
                            .layer("0", new DenseLayer.Builder().nIn(10).nOut(10).build(), "in")
                            .layer("1",
                                    !lossLayer ? new OutputLayer.Builder().nIn(10).nOut(nOut[i]).activation(activations[i]).lossFunction(lf[i]).build()
                                            : new LossLayer.Builder().activation(activations[i]).lossFunction(lf[i]).build(), "0")
                            .setOutputs("1")
                            .validateOutputLayerConfig(validate)
                            .build();
                    if (validate) {
                        fail("Expected exception: " + s);
                    }
                } catch (DL4JInvalidConfigException e) {
                    if (validate) {
                        assertTrue(s, e.getMessage().toLowerCase().contains("invalid output"));
                    } else {
                        fail("Validation should not be enabled");
                    }
                }
            }
        }
    }
}
 
Example 11
Source File: CNNGradientCheckTest.java    From deeplearning4j with Apache License 2.0 4 votes vote down vote up
@Test
public void testGradientCNNMLN() {
    //Parameterized test, testing combinations of:
    // (a) activation function
    // (b) Whether to test at random initialization, or after some learning (i.e., 'characteristic mode of operation')
    // (c) Loss function (with specified output activations)
    Activation[] activFns = {Activation.SIGMOID, Activation.TANH};
    boolean[] characteristic = {false, true}; //If true: run some backprop steps first

    LossFunctions.LossFunction[] lossFunctions =
            {LossFunctions.LossFunction.NEGATIVELOGLIKELIHOOD, LossFunctions.LossFunction.MSE};
    Activation[] outputActivations = {Activation.SOFTMAX, Activation.TANH}; //i.e., lossFunctions[i] used with outputActivations[i] here

    DataSet ds = new IrisDataSetIterator(150, 150).next();
    ds.normalizeZeroMeanZeroUnitVariance();
    INDArray input = ds.getFeatures();
    INDArray labels = ds.getLabels();

    for (Activation afn : activFns) {
        for (boolean doLearningFirst : characteristic) {
            for (int i = 0; i < lossFunctions.length; i++) {
                LossFunctions.LossFunction lf = lossFunctions[i];
                Activation outputActivation = outputActivations[i];

                MultiLayerConfiguration.Builder builder = new NeuralNetConfiguration.Builder()
                        .dataType(DataType.DOUBLE)
                        .optimizationAlgo(OptimizationAlgorithm.CONJUGATE_GRADIENT).updater(new NoOp())
                        .weightInit(WeightInit.XAVIER).seed(12345L).list()
                        .layer(0, new ConvolutionLayer.Builder(1, 1).nOut(6).activation(afn)
                                .cudnnAllowFallback(false)
                                .build())
                        .layer(1, new OutputLayer.Builder(lf).activation(outputActivation).nOut(3).build())
                        .setInputType(InputType.convolutionalFlat(1, 4, 1));

                MultiLayerConfiguration conf = builder.build();

                MultiLayerNetwork mln = new MultiLayerNetwork(conf);
                mln.init();
                String name = new Object() {
                }.getClass().getEnclosingMethod().getName();

                if (doLearningFirst) {
                    //Run a number of iterations of learning
                    mln.setInput(ds.getFeatures());
                    mln.setLabels(ds.getLabels());
                    mln.computeGradientAndScore();
                    double scoreBefore = mln.score();
                    for (int j = 0; j < 10; j++)
                        mln.fit(ds);
                    mln.computeGradientAndScore();
                    double scoreAfter = mln.score();
                    //Can't test in 'characteristic mode of operation' if not learning
                    String msg = name + " - score did not (sufficiently) decrease during learning - activationFn="
                            + afn + ", lossFn=" + lf + ", outputActivation=" + outputActivation
                            + ", doLearningFirst= " + doLearningFirst + " (before=" + scoreBefore
                            + ", scoreAfter=" + scoreAfter + ")";
                    assertTrue(msg, scoreAfter < 0.8 * scoreBefore);
                }

                if (PRINT_RESULTS) {
                    System.out.println(name + " - activationFn=" + afn + ", lossFn=" + lf + ", outputActivation="
                            + outputActivation + ", doLearningFirst=" + doLearningFirst);
                }

                boolean gradOK = GradientCheckUtil.checkGradients(mln, DEFAULT_EPS, DEFAULT_MAX_REL_ERROR,
                        DEFAULT_MIN_ABS_ERROR, PRINT_RESULTS, RETURN_ON_FIRST_FAILURE, input, labels);

                assertTrue(gradOK);
                TestUtils.testModelSerialization(mln);
            }
        }
    }
}
 
Example 12
Source File: CNNGradientCheckTest.java    From deeplearning4j with Apache License 2.0 4 votes vote down vote up
@Test
    public void testGradientCNNL1L2MLN() {
        //Parameterized test, testing combinations of:
        // (a) activation function
        // (b) Whether to test at random initialization, or after some learning (i.e., 'characteristic mode of operation')
        // (c) Loss function (with specified output activations)

        DataSet ds = new IrisDataSetIterator(150, 150).next();
        ds.normalizeZeroMeanZeroUnitVariance();
        INDArray input = ds.getFeatures();
        INDArray labels = ds.getLabels();

        //use l2vals[i] with l1vals[i]
        double[] l2vals = {0.4, 0.0, 0.4, 0.4};
        double[] l1vals = {0.0, 0.0, 0.5, 0.0};
        double[] biasL2 = {0.0, 0.0, 0.0, 0.2};
        double[] biasL1 = {0.0, 0.0, 0.6, 0.0};
        Activation[] activFns = {Activation.SIGMOID, Activation.TANH, Activation.ELU, Activation.SOFTPLUS};
        boolean[] characteristic = {false, true, false, true}; //If true: run some backprop steps first

        LossFunctions.LossFunction[] lossFunctions =
                {LossFunctions.LossFunction.NEGATIVELOGLIKELIHOOD, LossFunctions.LossFunction.MSE, LossFunctions.LossFunction.NEGATIVELOGLIKELIHOOD, LossFunctions.LossFunction.MSE};
        Activation[] outputActivations = {Activation.SOFTMAX, Activation.TANH, Activation.SOFTMAX, Activation.IDENTITY}; //i.e., lossFunctions[i] used with outputActivations[i] here

        for( int i=0; i<l2vals.length; i++ ){
            Activation afn = activFns[i];
            boolean doLearningFirst = characteristic[i];
            LossFunctions.LossFunction lf = lossFunctions[i];
            Activation outputActivation = outputActivations[i];
            double l2 = l2vals[i];
            double l1 = l1vals[i];

            MultiLayerConfiguration.Builder builder = new NeuralNetConfiguration.Builder()
                    .dataType(DataType.DOUBLE)
                    .l2(l2).l1(l1).l2Bias(biasL2[i]).l1Bias(biasL1[i])
                    .optimizationAlgo(
                            OptimizationAlgorithm.CONJUGATE_GRADIENT)
                    .seed(12345L).list()
                    .layer(0, new ConvolutionLayer.Builder(new int[]{1, 1}).nIn(1).nOut(6)
                            .weightInit(WeightInit.XAVIER).activation(afn)
                            .updater(new NoOp()).build())
                    .layer(1, new OutputLayer.Builder(lf).activation(outputActivation).nOut(3)
                            .weightInit(WeightInit.XAVIER).updater(new NoOp()).build())

                    .setInputType(InputType.convolutionalFlat(1, 4, 1));

            MultiLayerConfiguration conf = builder.build();

            MultiLayerNetwork mln = new MultiLayerNetwork(conf);
            mln.init();
            String testName = new Object() {
            }.getClass().getEnclosingMethod().getName();

            if (doLearningFirst) {
                //Run a number of iterations of learning
                mln.setInput(ds.getFeatures());
                mln.setLabels(ds.getLabels());
                mln.computeGradientAndScore();
                double scoreBefore = mln.score();
                for (int j = 0; j < 10; j++)
                    mln.fit(ds);
                mln.computeGradientAndScore();
                double scoreAfter = mln.score();
                //Can't test in 'characteristic mode of operation' if not learning
                String msg = testName
                        + "- score did not (sufficiently) decrease during learning - activationFn="
                        + afn + ", lossFn=" + lf + ", outputActivation=" + outputActivation
                        + ", doLearningFirst=" + doLearningFirst + " (before=" + scoreBefore
                        + ", scoreAfter=" + scoreAfter + ")";
                assertTrue(msg, scoreAfter < 0.8 * scoreBefore);
            }

            if (PRINT_RESULTS) {
                System.out.println(testName + "- activationFn=" + afn + ", lossFn=" + lf
                        + ", outputActivation=" + outputActivation + ", doLearningFirst="
                        + doLearningFirst);
//                for (int j = 0; j < mln.getnLayers(); j++)
//                    System.out.println("Layer " + j + " # params: " + mln.getLayer(j).numParams());
            }

            boolean gradOK = GradientCheckUtil.checkGradients(mln, DEFAULT_EPS, DEFAULT_MAX_REL_ERROR,
                    DEFAULT_MIN_ABS_ERROR, PRINT_RESULTS, RETURN_ON_FIRST_FAILURE, input, labels);

            assertTrue(gradOK);
            TestUtils.testModelSerialization(mln);
        }
    }
 
Example 13
Source File: LossFunctionParameterSpaceAdapter.java    From deeplearning4j with Apache License 2.0 4 votes vote down vote up
public LossFunctionParameterSpaceAdapter(
                @JsonProperty("lossFunction") ParameterSpace<LossFunctions.LossFunction> lossFunction) {
    this.lossFunction = lossFunction;
}
 
Example 14
Source File: LossFunctionParameterSpaceAdapter.java    From deeplearning4j with Apache License 2.0 4 votes vote down vote up
@Override
protected ILossFunction convertValue(LossFunctions.LossFunction from) {
    return from.getILossFunction();
}
 
Example 15
Source File: LossFunctionParameterSpaceAdapter.java    From deeplearning4j with Apache License 2.0 4 votes vote down vote up
@Override
protected ParameterSpace<LossFunctions.LossFunction> underlying() {
    return lossFunction;
}
 
Example 16
Source File: TestScoreFunctions.java    From deeplearning4j with Apache License 2.0 4 votes vote down vote up
@Test
public void testROCScoreFunctions() throws Exception {


    for (boolean auc : new boolean[]{true, false}) {
        for (ROCScoreFunction.ROCType rocType : ROCScoreFunction.ROCType.values()) {
            String msg = (auc ? "AUC" : "AUPRC") + " - " + rocType;
            log.info("Starting: " + msg);

            ParameterSpace<Double> lr = new ContinuousParameterSpace(1e-5, 1e-3);

            int nOut = (rocType == ROCScoreFunction.ROCType.ROC ? 2 : 10);
            LossFunctions.LossFunction lf = (rocType == ROCScoreFunction.ROCType.BINARY ?
                    LossFunctions.LossFunction.XENT : LossFunctions.LossFunction.MCXENT);
            Activation a = (rocType == ROCScoreFunction.ROCType.BINARY ? Activation.SIGMOID : Activation.SOFTMAX);
            MultiLayerSpace mls = new MultiLayerSpace.Builder()
                    .trainingWorkspaceMode(WorkspaceMode.NONE)
                    .inferenceWorkspaceMode(WorkspaceMode.NONE)
                    .updater(new AdamSpace(lr))
                    .weightInit(WeightInit.XAVIER)
                    .layer(new OutputLayerSpace.Builder().nIn(784).nOut(nOut)
                            .activation(a)
                            .lossFunction(lf).build())
                    .build();

            CandidateGenerator cg = new RandomSearchGenerator(mls);
            ResultSaver rs = new InMemoryResultSaver();
            ScoreFunction sf = new ROCScoreFunction(rocType, (auc ? ROCScoreFunction.Metric.AUC : ROCScoreFunction.Metric.AUPRC));


            OptimizationConfiguration oc = new OptimizationConfiguration.Builder()
                    .candidateGenerator(cg)
                    .dataProvider(new DP(rocType))
                    .modelSaver(rs)
                    .scoreFunction(sf)
                    .terminationConditions(new MaxCandidatesCondition(3))
                    .rngSeed(12345)
                    .build();

            IOptimizationRunner runner = new LocalOptimizationRunner(oc, new MultiLayerNetworkTaskCreator());
            runner.execute();

            List<ResultReference> list = runner.getResults();

            for (ResultReference rr : list) {
                DataSetIterator testIter = new MnistDataSetIterator(4, 16, false, false, false, 12345);
                testIter.setPreProcessor(new PreProc(rocType));

                OptimizationResult or = rr.getResult();
                MultiLayerNetwork net = (MultiLayerNetwork) or.getResultReference().getResultModel();

                double expScore;
                switch (rocType){
                    case ROC:
                        if(auc){
                            expScore = net.doEvaluation(testIter, new ROC())[0].calculateAUC();
                        } else {
                            expScore = net.doEvaluation(testIter, new ROC())[0].calculateAUCPR();
                        }
                        break;
                    case BINARY:
                        if(auc){
                            expScore = net.doEvaluation(testIter, new ROCBinary())[0].calculateAverageAuc();
                        } else {
                            expScore = net.doEvaluation(testIter, new ROCBinary())[0].calculateAverageAUCPR();
                        }
                        break;
                    case MULTICLASS:
                        if(auc){
                            expScore = net.doEvaluation(testIter, new ROCMultiClass())[0].calculateAverageAUC();
                        } else {
                            expScore = net.doEvaluation(testIter, new ROCMultiClass())[0].calculateAverageAUCPR();
                        }
                        break;
                    default:
                        throw new RuntimeException();
                }


                DataSetIterator iter = new MnistDataSetIterator(4, 16, false, false, false, 12345);
                iter.setPreProcessor(new PreProc(rocType));

                assertEquals(msg, expScore, or.getScore(), 1e-4);
            }
        }
    }
}