Java Code Examples for org.nd4j.linalg.activations.Activation#TANH

The following examples show how to use org.nd4j.linalg.activations.Activation#TANH . You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: KerasActivationUtils.java    From deeplearning4j with Apache License 2.0 5 votes vote down vote up
/**
 * Map Keras to DL4J activation functions.
 *
 * @param conf Keras layer configuration
 * @param kerasActivation String containing Keras activation function name
 * @return Activation enum value containing DL4J activation function name
 */
public static Activation mapToActivation(String kerasActivation, KerasLayerConfiguration conf)
        throws UnsupportedKerasConfigurationException {
    Activation dl4jActivation;
    if (kerasActivation.equals(conf.getKERAS_ACTIVATION_SOFTMAX())) {
        dl4jActivation = Activation.SOFTMAX;
    } else if (kerasActivation.equals(conf.getKERAS_ACTIVATION_SOFTPLUS())) {
        dl4jActivation = Activation.SOFTPLUS;
    } else if (kerasActivation.equals(conf.getKERAS_ACTIVATION_SOFTSIGN())) {
        dl4jActivation = Activation.SOFTSIGN;
    } else if (kerasActivation.equals(conf.getKERAS_ACTIVATION_RELU())) {
        dl4jActivation = Activation.RELU;
    } else if (kerasActivation.equals(conf.getKERAS_ACTIVATION_RELU6())) {
        dl4jActivation = Activation.RELU6;
    } else if (kerasActivation.equals(conf.getKERAS_ACTIVATION_ELU())) {
        dl4jActivation = Activation.ELU;
    } else if (kerasActivation.equals(conf.getKERAS_ACTIVATION_SELU())) {
        dl4jActivation = Activation.SELU;
    } else if (kerasActivation.equals(conf.getKERAS_ACTIVATION_TANH())) {
        dl4jActivation = Activation.TANH;
    } else if (kerasActivation.equals(conf.getKERAS_ACTIVATION_SIGMOID())) {
        dl4jActivation = Activation.SIGMOID;
    } else if (kerasActivation.equals(conf.getKERAS_ACTIVATION_HARD_SIGMOID())) {
        dl4jActivation = Activation.HARDSIGMOID;
    } else if (kerasActivation.equals(conf.getKERAS_ACTIVATION_LINEAR())) {
        dl4jActivation = Activation.IDENTITY;
    } else if (kerasActivation.equals(conf.getKERAS_ACTIVATION_SWISH())) {
        dl4jActivation = Activation.SWISH;
    } else {
        throw new UnsupportedKerasConfigurationException(
                "Unknown Keras activation function " + kerasActivation);
    }
    return dl4jActivation;
}
 
Example 2
Source File: CNNGradientCheckTest.java    From deeplearning4j with Apache License 2.0 4 votes vote down vote up
@Test
public void testCnnWithSubsamplingV2() {
    Nd4j.getRandom().setSeed(12345);
    int nOut = 4;

    int[] minibatchSizes = {1, 3};
    int width = 5;
    int height = 5;
    int inputDepth = 1;

    int[] kernel = {2, 2};
    int[] stride = {1, 1};
    int[] padding = {0, 0};
    int pNorm = 3;

    Activation[] activations = {Activation.SIGMOID, Activation.TANH};
    SubsamplingLayer.PoolingType[] poolingTypes =
            new SubsamplingLayer.PoolingType[]{SubsamplingLayer.PoolingType.MAX,
                    SubsamplingLayer.PoolingType.AVG, SubsamplingLayer.PoolingType.PNORM};

    for (Activation afn : activations) {
        for (SubsamplingLayer.PoolingType poolingType : poolingTypes) {
            for (int minibatchSize : minibatchSizes) {
                INDArray input = Nd4j.rand(minibatchSize, width * height * inputDepth);
                INDArray labels = Nd4j.zeros(minibatchSize, nOut);
                for (int i = 0; i < minibatchSize; i++) {
                    labels.putScalar(new int[]{i, i % nOut}, 1.0);
                }

                MultiLayerConfiguration conf =
                        new NeuralNetConfiguration.Builder().updater(new NoOp())
                                .dataType(DataType.DOUBLE)
                                .dist(new NormalDistribution(0, 1))
                                .list().layer(0,
                                new ConvolutionLayer.Builder(kernel,
                                        stride, padding).nIn(inputDepth)
                                        .cudnnAllowFallback(false)
                                        .nOut(3).build())//output: (5-2+0)/1+1 = 4
                                .layer(1, new SubsamplingLayer.Builder(poolingType)
                                        .kernelSize(kernel).stride(stride).padding(padding)
                                        .cudnnAllowFallback(false)
                                        .pnorm(pNorm).build()) //output: (4-2+0)/1+1 =3 -> 3x3x3
                                .layer(2, new ConvolutionLayer.Builder(kernel, stride, padding)
                                        .cudnnAllowFallback(false)
                                        .nIn(3).nOut(2).build()) //Output: (3-2+0)/1+1 = 2
                                .layer(3, new OutputLayer.Builder(LossFunctions.LossFunction.MCXENT)
                                        .activation(Activation.SOFTMAX).nIn(2 * 2 * 2)
                                        .nOut(4).build())
                                .setInputType(InputType.convolutionalFlat(height, width,
                                        inputDepth))
                                .build();

                MultiLayerNetwork net = new MultiLayerNetwork(conf);
                net.init();

                String msg = "PoolingType=" + poolingType + ", minibatch=" + minibatchSize + ", activationFn="
                        + afn;
                System.out.println(msg);

                boolean gradOK = GradientCheckUtil.checkGradients(net, DEFAULT_EPS, DEFAULT_MAX_REL_ERROR,
                        DEFAULT_MIN_ABS_ERROR, PRINT_RESULTS, RETURN_ON_FIRST_FAILURE, input, labels);

                assertTrue(msg, gradOK);

                TestUtils.testModelSerialization(net);
            }
        }
    }
}
 
Example 3
Source File: CNN3DGradientCheckTest.java    From deeplearning4j with Apache License 2.0 4 votes vote down vote up
@Test
public void testDeconv3d() {
    Nd4j.getRandom().setSeed(12345);
    // Note: we checked this with a variety of parameters, but it takes a lot of time.
    int[] depths = {8, 8, 9};
    int[] heights = {8, 9, 9};
    int[] widths = {8, 8, 9};


    int[][] kernels = {{2, 2, 2}, {3, 3, 3}, {2, 3, 2}};
    int[][] strides = {{1, 1, 1}, {1, 1, 1}, {2, 2, 2}};

    Activation[] activations = {Activation.SIGMOID, Activation.TANH, Activation.IDENTITY};

    ConvolutionMode[] modes = {ConvolutionMode.Truncate, ConvolutionMode.Same, ConvolutionMode.Same};
    int[] mbs = {1, 3, 2};
    Convolution3D.DataFormat[] dataFormats = new Convolution3D.DataFormat[]{Convolution3D.DataFormat.NCDHW, Convolution3D.DataFormat.NDHWC, Convolution3D.DataFormat.NCDHW};

    int convNIn = 2;
    int finalNOut = 2;
    int[] deconvOut = {2, 3, 4};

    for (int i = 0; i < activations.length; i++) {
        Activation afn = activations[i];
        int miniBatchSize = mbs[i];
        int depth = depths[i];
        int height = heights[i];
        int width = widths[i];
        ConvolutionMode mode = modes[i];
        int[] kernel = kernels[i];
        int[] stride = strides[i];
        Convolution3D.DataFormat df = dataFormats[i];
        int dOut = deconvOut[i];

        INDArray input;
        if (df == Convolution3D.DataFormat.NDHWC) {
            input = Nd4j.rand(new int[]{miniBatchSize, depth, height, width, convNIn});
        } else {
            input = Nd4j.rand(new int[]{miniBatchSize, convNIn, depth, height, width});
        }
        INDArray labels = Nd4j.zeros(miniBatchSize, finalNOut);
        for (int j = 0; j < miniBatchSize; j++) {
            labels.putScalar(new int[]{j, j % finalNOut}, 1.0);
        }

        MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder()
                .dataType(DataType.DOUBLE)
                .updater(new NoOp())
                .weightInit(new NormalDistribution(0, 0.1))
                .list()
                .layer(0, new Convolution3D.Builder().activation(afn).kernelSize(kernel)
                        .stride(stride).nIn(convNIn).nOut(dOut).hasBias(false)
                        .convolutionMode(mode).dataFormat(df)
                        .build())
                .layer(1, new Deconvolution3D.Builder().activation(afn).kernelSize(kernel)
                        .stride(stride).nOut(dOut).hasBias(false)
                        .convolutionMode(mode).dataFormat(df)
                        .build())
                .layer(new OutputLayer.Builder(LossFunctions.LossFunction.MCXENT)
                        .activation(Activation.SOFTMAX).nOut(finalNOut).build())
                .setInputType(InputType.convolutional3D(df, depth, height, width, convNIn)).build();

        String json = conf.toJson();
        MultiLayerConfiguration c2 = MultiLayerConfiguration.fromJson(json);
        assertEquals(conf, c2);

        MultiLayerNetwork net = new MultiLayerNetwork(conf);
        net.init();

        String msg = "DataFormat = " + df + ", minibatch size = " + miniBatchSize + ", activationFn=" + afn
                + ", kernel = " + Arrays.toString(kernel) + ", stride = "
                + Arrays.toString(stride) + ", mode = " + mode.toString()
                + ", input depth " + depth + ", input height " + height
                + ", input width " + width;

        if (PRINT_RESULTS) {
            log.info(msg);
        }

        boolean gradOK = GradientCheckUtil.checkGradients(new GradientCheckUtil.MLNConfig().net(net).input(input)
                .labels(labels).subset(true).maxPerParam(64));

        assertTrue(msg, gradOK);

        TestUtils.testModelSerialization(net);
    }
}
 
Example 4
Source File: VaeGradientCheckTests.java    From deeplearning4j with Apache License 2.0 4 votes vote down vote up
@Test
    public void testVaeAsMLP() {
        //Post pre-training: a VAE can be used as a MLP, by taking the mean value from p(z|x) as the output
        //This gradient check tests this part

        Activation[] activFns = {Activation.IDENTITY, Activation.TANH, Activation.IDENTITY, Activation.TANH, Activation.IDENTITY, Activation.TANH};

        LossFunction[] lossFunctions = {LossFunction.MCXENT, LossFunction.MCXENT, LossFunction.MSE, LossFunction.MSE, LossFunction.MCXENT, LossFunction.MSE};
        Activation[] outputActivations = {Activation.SOFTMAX, Activation.SOFTMAX, Activation.TANH, Activation.TANH, Activation.SOFTMAX, Activation.TANH};

        //use l2vals[i] with l1vals[i]
        double[] l2vals = {0.4, 0.0, 0.4, 0.4, 0.0, 0.0};
        double[] l1vals = {0.0, 0.0, 0.5, 0.0, 0.0, 0.5};
        double[] biasL2 = {0.0, 0.0, 0.0, 0.2, 0.0, 0.4};
        double[] biasL1 = {0.0, 0.0, 0.6, 0.0, 0.0, 0.0};

        int[][] encoderLayerSizes = new int[][] {{5}, {5}, {5, 6}, {5, 6}, {5}, {5, 6}};
        int[][] decoderLayerSizes = new int[][] {{6}, {7, 8}, {6}, {7, 8}, {6}, {7, 8}};

        int[] minibatches = new int[]{1,5,4,3,1,4};

        Nd4j.getRandom().setSeed(12345);
        for( int i=0; i<activFns.length; i++ ){
            LossFunction lf = lossFunctions[i];
            Activation outputActivation = outputActivations[i];
            double l2 = l2vals[i];
            double l1 = l1vals[i];
            int[] encoderSizes = encoderLayerSizes[i];
            int[] decoderSizes = decoderLayerSizes[i];
            int minibatch = minibatches[i];
            INDArray input = Nd4j.rand(minibatch, 4);
            INDArray labels = Nd4j.create(minibatch, 3);
            for (int j = 0; j < minibatch; j++) {
                labels.putScalar(j, j % 3, 1.0);
            }
            Activation afn = activFns[i];

            MultiLayerConfiguration conf =
                    new NeuralNetConfiguration.Builder().l2(l2).l1(l1)
                            .dataType(DataType.DOUBLE)
                            .updater(new NoOp())
                            .l2Bias(biasL2[i]).l1Bias(biasL1[i])
                            .updater(new NoOp()).seed(12345L).list()
                            .layer(0, new VariationalAutoencoder.Builder().nIn(4)
                                    .nOut(3).encoderLayerSizes(encoderSizes)
                                    .decoderLayerSizes(decoderSizes)

                                    .dist(new NormalDistribution(0, 1))
                                    .activation(afn)
                                    .build())
                            .layer(1, new OutputLayer.Builder(lf)
                                    .activation(outputActivation).nIn(3).nOut(3)

                                    .dist(new NormalDistribution(0, 1))
                                    .build())
                            .build();

            MultiLayerNetwork mln = new MultiLayerNetwork(conf);
            mln.init();

            String msg = "testVaeAsMLP() - activationFn=" + afn + ", lossFn=" + lf
                    + ", outputActivation=" + outputActivation + ", encLayerSizes = "
                    + Arrays.toString(encoderSizes) + ", decLayerSizes = "
                    + Arrays.toString(decoderSizes) + ", l2=" + l2 + ", l1=" + l1;
            if (PRINT_RESULTS) {
                System.out.println(msg);
//                for (int j = 0; j < mln.getnLayers(); j++)
//                    System.out.println("Layer " + j + " # params: " + mln.getLayer(j).numParams());
            }

            boolean gradOK = GradientCheckUtil.checkGradients(mln, DEFAULT_EPS, DEFAULT_MAX_REL_ERROR,
                    DEFAULT_MIN_ABS_ERROR, PRINT_RESULTS, RETURN_ON_FIRST_FAILURE, input,
                    labels);
            assertTrue(msg, gradOK);
            TestUtils.testModelSerialization(mln);
        }
    }
 
Example 5
Source File: JsonTest.java    From deeplearning4j with Apache License 2.0 4 votes vote down vote up
@Test
public void testJsonLossFunctions() {

    ILossFunction[] lossFunctions = new ILossFunction[] {new LossBinaryXENT(), new LossBinaryXENT(),
                    new LossCosineProximity(), new LossHinge(), new LossKLD(), new LossKLD(), new LossL1(),
                    new LossL1(), new LossL2(), new LossL2(), new LossMAE(), new LossMAE(), new LossMAPE(),
                    new LossMAPE(), new LossMCXENT(), new LossMSE(), new LossMSE(), new LossMSLE(), new LossMSLE(),
                    new LossNegativeLogLikelihood(), new LossNegativeLogLikelihood(), new LossPoisson(),
                    new LossSquaredHinge(), new LossFMeasure(), new LossFMeasure(2.0)};

    Activation[] outputActivationFn = new Activation[] {Activation.SIGMOID, //xent
                    Activation.SIGMOID, //xent
                    Activation.TANH, //cosine
                    Activation.TANH, //hinge -> trying to predict 1 or -1
                    Activation.SIGMOID, //kld -> probab so should be between 0 and 1
                    Activation.SOFTMAX, //kld + softmax
                    Activation.TANH, //l1
                    Activation.SOFTMAX, //l1 + softmax
                    Activation.TANH, //l2
                    Activation.SOFTMAX, //l2 + softmax
                    Activation.IDENTITY, //mae
                    Activation.SOFTMAX, //mae + softmax
                    Activation.IDENTITY, //mape
                    Activation.SOFTMAX, //mape + softmax
                    Activation.SOFTMAX, //mcxent
                    Activation.IDENTITY, //mse
                    Activation.SOFTMAX, //mse + softmax
                    Activation.SIGMOID, //msle  -   requires positive labels/activations due to log
                    Activation.SOFTMAX, //msle + softmax
                    Activation.SIGMOID, //nll
                    Activation.SOFTMAX, //nll + softmax
                    Activation.SIGMOID, //poisson - requires positive predictions due to log... not sure if this is the best option
                    Activation.TANH, //squared hinge
                    Activation.SIGMOID, //f-measure (binary, single sigmoid output)
                    Activation.SOFTMAX //f-measure (binary, 2-label softmax output)
    };

    int[] nOut = new int[] {1, //xent
                    3, //xent
                    5, //cosine
                    3, //hinge
                    3, //kld
                    3, //kld + softmax
                    3, //l1
                    3, //l1 + softmax
                    3, //l2
                    3, //l2 + softmax
                    3, //mae
                    3, //mae + softmax
                    3, //mape
                    3, //mape + softmax
                    3, //mcxent
                    3, //mse
                    3, //mse + softmax
                    3, //msle
                    3, //msle + softmax
                    3, //nll
                    3, //nll + softmax
                    3, //poisson
                    3, //squared hinge
                    1, //f-measure (binary, single sigmoid output)
                    2, //f-measure (binary, 2-label softmax output)
    };

    for (int i = 0; i < lossFunctions.length; i++) {

        MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder().seed(12345).updater(Updater.ADAM).list()
                        .layer(0, new DenseLayer.Builder().nIn(4).nOut(nOut[i]).activation(Activation.TANH).build())
                        .layer(1, new LossLayer.Builder().lossFunction(lossFunctions[i])
                                        .activation(outputActivationFn[i]).build())
                        .validateOutputLayerConfig(false).build();

        String json = conf.toJson();
        String yaml = conf.toYaml();

        MultiLayerConfiguration fromJson = MultiLayerConfiguration.fromJson(json);
        MultiLayerConfiguration fromYaml = MultiLayerConfiguration.fromYaml(yaml);

        assertEquals(conf, fromJson);
        assertEquals(conf, fromYaml);
    }
}
 
Example 6
Source File: CNNGradientCheckTest.java    From deeplearning4j with Apache License 2.0 4 votes vote down vote up
@Test
public void testCnnMultiLayer() {
    int nOut = 2;

    int[] minibatchSizes = {1, 2, 5};
    int width = 5;
    int height = 5;
    int[] inputDepths = {1, 2, 4};

    Activation[] activations = {Activation.SIGMOID, Activation.TANH};
    SubsamplingLayer.PoolingType[] poolingTypes = new SubsamplingLayer.PoolingType[]{
            SubsamplingLayer.PoolingType.MAX, SubsamplingLayer.PoolingType.AVG};

    Nd4j.getRandom().setSeed(12345);

    boolean nchw = format == CNN2DFormat.NCHW;

    for (int inputDepth : inputDepths) {
        for (Activation afn : activations) {
            for (SubsamplingLayer.PoolingType poolingType : poolingTypes) {
                for (int minibatchSize : minibatchSizes) {
                    long[] inShape = nchw ? new long[]{minibatchSize, inputDepth, height, width} : new long[]{minibatchSize, height, width, inputDepth};
                    INDArray input = Nd4j.rand(DataType.DOUBLE, inShape);

                    INDArray labels = Nd4j.zeros(minibatchSize, nOut);
                    for (int i = 0; i < minibatchSize; i++) {
                        labels.putScalar(new int[]{i, i % nOut}, 1.0);
                    }

                    MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder().seed(12345).updater(new NoOp())
                            .dataType(DataType.DOUBLE)
                            .activation(afn)
                            .list()
                            .layer(0, new ConvolutionLayer.Builder().kernelSize(2, 2).stride(1, 1)
                                    .padding(0, 0).nIn(inputDepth).nOut(2).build())//output: (5-2+0)/1+1 = 4
                            .layer(1, new ConvolutionLayer.Builder().nIn(2).nOut(2).kernelSize(2, 2)
                                    .stride(1, 1).padding(0, 0).build()) //(4-2+0)/1+1 = 3
                            .layer(2, new ConvolutionLayer.Builder().nIn(2).nOut(2).kernelSize(2, 2)
                                    .stride(1, 1).padding(0, 0).build()) //(3-2+0)/1+1 = 2
                            .layer(3, new OutputLayer.Builder(LossFunctions.LossFunction.MCXENT)
                                    .activation(Activation.SOFTMAX).nIn(2 * 2 * 2).nOut(nOut)
                                    .build())
                            .setInputType(InputType.convolutional(height, width, inputDepth, format)).build();

                    assertEquals(ConvolutionMode.Truncate,
                            ((ConvolutionLayer) conf.getConf(0).getLayer()).getConvolutionMode());

                    MultiLayerNetwork net = new MultiLayerNetwork(conf);
                    net.init();

                    for (int i = 0; i < 4; i++) {
                        System.out.println("nParams, layer " + i + ": " + net.getLayer(i).numParams());
                    }

                    String msg = "PoolingType=" + poolingType + ", minibatch=" + minibatchSize + ", activationFn="
                            + afn;
                    System.out.println(msg);

                    boolean gradOK = GradientCheckUtil.checkGradients(net, DEFAULT_EPS, DEFAULT_MAX_REL_ERROR,
                            DEFAULT_MIN_ABS_ERROR, PRINT_RESULTS, RETURN_ON_FIRST_FAILURE, input, labels);

                    assertTrue(msg, gradOK);

                    TestUtils.testModelSerialization(net);
                }
            }
        }
    }
}
 
Example 7
Source File: CNNGradientCheckTest.java    From deeplearning4j with Apache License 2.0 4 votes vote down vote up
@Test
public void testCnnLocallyConnected2D() {
    int nOut = 3;
    int width = 5;
    int height = 5;

    Nd4j.getRandom().setSeed(12345);

    int[] inputDepths = new int[]{1, 2, 4};
    Activation[] activations = {Activation.SIGMOID, Activation.TANH, Activation.SOFTPLUS};
    int[] minibatch = {2, 1, 3};

    boolean nchw = format == CNN2DFormat.NCHW;

    for( int i=0; i<inputDepths.length; i++ ){
        int inputDepth = inputDepths[i];
        Activation afn = activations[i];
        int minibatchSize = minibatch[i];

        long[] inShape = nchw ? new long[]{minibatchSize, inputDepth, height, width} : new long[]{minibatchSize, height, width, inputDepth};
        INDArray input = Nd4j.rand(DataType.DOUBLE, inShape);
        INDArray labels = TestUtils.randomOneHot(minibatchSize, nOut);

        MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder().seed(12345).updater(new NoOp())
                .dataType(DataType.DOUBLE)
                .activation(afn)
                .list()
                .layer(0, new ConvolutionLayer.Builder().kernelSize(2, 2).stride(1, 1)
                        .padding(0, 0).nIn(inputDepth).nOut(2).build())//output: (5-2+0)/1+1 = 4
                .layer(1, new LocallyConnected2D.Builder().nIn(2).nOut(7).kernelSize(2, 2)
                        .setInputSize(4, 4).convolutionMode(ConvolutionMode.Strict).hasBias(false)
                        .stride(1, 1).padding(0, 0).build()) //(4-2+0)/1+1 = 3
                .layer(2, new ConvolutionLayer.Builder().nIn(7).nOut(2).kernelSize(2, 2)
                        .stride(1, 1).padding(0, 0).build()) //(3-2+0)/1+1 = 2
                .layer(3, new OutputLayer.Builder(LossFunctions.LossFunction.MCXENT)
                        .activation(Activation.SOFTMAX).nIn(2 * 2 * 2).nOut(nOut)
                        .build())
                .setInputType(InputType.convolutional(height, width, inputDepth, format)).build();

        assertEquals(ConvolutionMode.Truncate,
                ((ConvolutionLayer) conf.getConf(0).getLayer()).getConvolutionMode());

        MultiLayerNetwork net = new MultiLayerNetwork(conf);
        net.init();

        String msg = "Minibatch=" + minibatchSize + ", activationFn="
                + afn;
        System.out.println(msg);

        boolean gradOK = GradientCheckUtil.checkGradients(net, DEFAULT_EPS, DEFAULT_MAX_REL_ERROR,
                DEFAULT_MIN_ABS_ERROR, PRINT_RESULTS, RETURN_ON_FIRST_FAILURE, input, labels);

        assertTrue(msg, gradOK);

        TestUtils.testModelSerialization(net);
    }
}
 
Example 8
Source File: TestSameDiffDense.java    From deeplearning4j with Apache License 2.0 4 votes vote down vote up
@Test
    public void testSameDiffDenseBackward() {
        int nIn = 3;
        int nOut = 4;

        for (boolean workspaces : new boolean[]{false, true}) {

            for (int minibatch : new int[]{5, 1}) {

                Activation[] afns = new Activation[]{
                        Activation.TANH,
                        Activation.SIGMOID,
                        Activation.ELU,
                        Activation.IDENTITY,
                        Activation.SOFTPLUS,
                        Activation.SOFTSIGN,
                        Activation.HARDTANH,
                        Activation.CUBE,
                        Activation.RELU
                };

                for (Activation a : afns) {
                    log.info("Starting test - " + a + " - minibatch " + minibatch + ", workspaces: " + workspaces);
                    MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder()
                            .trainingWorkspaceMode(workspaces ? WorkspaceMode.ENABLED : WorkspaceMode.NONE)
                            .inferenceWorkspaceMode(workspaces ? WorkspaceMode.ENABLED : WorkspaceMode.NONE)
                            .list()
                            .layer(new SameDiffDense.Builder().nIn(nIn).nOut(nOut)
                                    .activation(a)
                                    .build())
                            .layer(new OutputLayer.Builder().nIn(nOut).nOut(nOut).activation(Activation.SOFTMAX)
                                    .lossFunction(LossFunctions.LossFunction.MCXENT).build())
                            .build();

                    MultiLayerNetwork netSD = new MultiLayerNetwork(conf);
                    netSD.init();

                    MultiLayerConfiguration conf2 = new NeuralNetConfiguration.Builder()
                            .list()
                            .layer(new DenseLayer.Builder().activation(a).nIn(nIn).nOut(nOut).build())
                            .layer(new OutputLayer.Builder().nIn(nOut).nOut(nOut).activation(Activation.SOFTMAX)
                                    .lossFunction(LossFunctions.LossFunction.MCXENT).build())
                            .build();

                    MultiLayerNetwork netStandard = new MultiLayerNetwork(conf2);
                    netStandard.init();

                    netSD.params().assign(netStandard.params());

                    //Check params:
                    assertEquals(netStandard.params(), netSD.params());
                    assertEquals(netStandard.paramTable(), netSD.paramTable());

                    INDArray in = Nd4j.rand(minibatch, nIn);
                    INDArray l = TestUtils.randomOneHot(minibatch, nOut, 12345);
                    netSD.setInput(in);
                    netStandard.setInput(in);
                    netSD.setLabels(l);
                    netStandard.setLabels(l);

                    netSD.computeGradientAndScore();
                    netStandard.computeGradientAndScore();

                    Gradient gSD = netSD.gradient();
                    Gradient gStd = netStandard.gradient();

                    Map<String, INDArray> m1 = gSD.gradientForVariable();
                    Map<String, INDArray> m2 = gStd.gradientForVariable();

                    assertEquals(m2.keySet(), m1.keySet());

                    for (String s : m1.keySet()) {
                        INDArray i1 = m1.get(s);
                        INDArray i2 = m2.get(s);

                        assertEquals(s, i2, i1);
                    }

                    assertEquals(gStd.gradient(), gSD.gradient());

                    //Sanity check: different minibatch size
                    in = Nd4j.rand(2 * minibatch, nIn);
                    l = TestUtils.randomOneHot(2 * minibatch, nOut, 12345);
                    netSD.setInput(in);
                    netStandard.setInput(in);
                    netSD.setLabels(l);
                    netStandard.setLabels(l);

                    netSD.computeGradientAndScore();
//                    netStandard.computeGradientAndScore();
//                    assertEquals(netStandard.gradient().gradient(), netSD.gradient().gradient());

                    //Sanity check on different minibatch sizes:
                    INDArray newIn = Nd4j.vstack(in, in);
                    INDArray outMbsd = netSD.output(newIn);
                    INDArray outMb = netStandard.output(newIn);
                    assertEquals(outMb, outMbsd);
                }
            }
        }
    }
 
Example 9
Source File: CNNGradientCheckTest.java    From deeplearning4j with Apache License 2.0 4 votes vote down vote up
@Test
    public void testCnnWithSubsampling() {
        Nd4j.getRandom().setSeed(12345);
        int nOut = 4;

        int[] minibatchSizes = {1, 3};
        int width = 5;
        int height = 5;
        int inputDepth = 1;

        int[] kernel = {2, 2};
        int[] stride = {1, 1};
        int[] padding = {0, 0};
        int pnorm = 2;

        Activation[] activations = {Activation.SIGMOID, Activation.TANH};
        SubsamplingLayer.PoolingType[] poolingTypes =
                new SubsamplingLayer.PoolingType[]{SubsamplingLayer.PoolingType.MAX,
                        SubsamplingLayer.PoolingType.AVG, SubsamplingLayer.PoolingType.PNORM};

        boolean nchw = format == CNN2DFormat.NCHW;

        for (Activation afn : activations) {
            for (SubsamplingLayer.PoolingType poolingType : poolingTypes) {
                for (int minibatchSize : minibatchSizes) {
                    long[] inShape = nchw ? new long[]{minibatchSize, inputDepth, height, width} : new long[]{minibatchSize, height, width, inputDepth};
                    INDArray input = Nd4j.rand(DataType.DOUBLE, inShape);
                    INDArray labels = Nd4j.zeros(minibatchSize, nOut);
                    for (int i = 0; i < minibatchSize; i++) {
                        labels.putScalar(new int[]{i, i % nOut}, 1.0);
                    }

                    MultiLayerConfiguration conf =
                            new NeuralNetConfiguration.Builder().updater(new NoOp())
                                    .dataType(DataType.DOUBLE)
                                    .dist(new NormalDistribution(0, 1))
                                    .list().layer(0,
                                    new ConvolutionLayer.Builder(kernel,
                                            stride, padding).nIn(inputDepth)
                                            .nOut(3).build())//output: (5-2+0)/1+1 = 4
                                    .layer(1, new SubsamplingLayer.Builder(poolingType)
                                            .kernelSize(kernel).stride(stride).padding(padding)
                                            .pnorm(pnorm).build()) //output: (4-2+0)/1+1 =3 -> 3x3x3
                                    .layer(2, new OutputLayer.Builder(LossFunctions.LossFunction.MCXENT)
                                            .activation(Activation.SOFTMAX).nIn(3 * 3 * 3)
                                            .nOut(4).build())
                                    .setInputType(InputType.convolutional(height, width, inputDepth, format))
                                    .build();

                    MultiLayerNetwork net = new MultiLayerNetwork(conf);
                    net.init();

                    String msg = format + " - poolingType=" + poolingType + ", minibatch=" + minibatchSize + ", activationFn="
                            + afn;

                    if (PRINT_RESULTS) {
                        System.out.println(msg);
//                        for (int j = 0; j < net.getnLayers(); j++)
//                            System.out.println("Layer " + j + " # params: " + net.getLayer(j).numParams());
                    }

                    boolean gradOK = GradientCheckUtil.checkGradients(net, DEFAULT_EPS, DEFAULT_MAX_REL_ERROR,
                            DEFAULT_MIN_ABS_ERROR, PRINT_RESULTS, RETURN_ON_FIRST_FAILURE, input, labels);

                    assertTrue(msg, gradOK);

                    TestUtils.testModelSerialization(net);
                }
            }
        }
    }
 
Example 10
Source File: CNNGradientCheckTest.java    From deeplearning4j with Apache License 2.0 4 votes vote down vote up
@Test
    public void testGradientCNNL1L2MLN() {
        if(this.format != CNN2DFormat.NCHW) //Only test NCHW due to flat input format...
            return;

        //Parameterized test, testing combinations of:
        // (a) activation function
        // (b) Whether to test at random initialization, or after some learning (i.e., 'characteristic mode of operation')
        // (c) Loss function (with specified output activations)

        DataSet ds = new IrisDataSetIterator(150, 150).next();
        ds.normalizeZeroMeanZeroUnitVariance();
        INDArray input = ds.getFeatures();
        INDArray labels = ds.getLabels();

        //use l2vals[i] with l1vals[i]
        double[] l2vals = {0.4, 0.0, 0.4, 0.4};
        double[] l1vals = {0.0, 0.0, 0.5, 0.0};
        double[] biasL2 = {0.0, 0.0, 0.0, 0.2};
        double[] biasL1 = {0.0, 0.0, 0.6, 0.0};
        Activation[] activFns = {Activation.SIGMOID, Activation.TANH, Activation.ELU, Activation.SOFTPLUS};
        boolean[] characteristic = {false, true, false, true}; //If true: run some backprop steps first

        LossFunctions.LossFunction[] lossFunctions =
                {LossFunctions.LossFunction.NEGATIVELOGLIKELIHOOD, LossFunctions.LossFunction.MSE, LossFunctions.LossFunction.NEGATIVELOGLIKELIHOOD, LossFunctions.LossFunction.MSE};
        Activation[] outputActivations = {Activation.SOFTMAX, Activation.TANH, Activation.SOFTMAX, Activation.IDENTITY}; //i.e., lossFunctions[i] used with outputActivations[i] here

        for( int i=0; i<l2vals.length; i++ ){
            Activation afn = activFns[i];
            boolean doLearningFirst = characteristic[i];
            LossFunctions.LossFunction lf = lossFunctions[i];
            Activation outputActivation = outputActivations[i];
            double l2 = l2vals[i];
            double l1 = l1vals[i];

            MultiLayerConfiguration.Builder builder = new NeuralNetConfiguration.Builder()
                    .dataType(DataType.DOUBLE)
                    .l2(l2).l1(l1).l2Bias(biasL2[i]).l1Bias(biasL1[i])
                    .optimizationAlgo(
                            OptimizationAlgorithm.CONJUGATE_GRADIENT)
                    .seed(12345L).list()
                    .layer(0, new ConvolutionLayer.Builder(new int[]{1, 1}).nIn(1).nOut(6)
                            .weightInit(WeightInit.XAVIER).activation(afn)
                            .updater(new NoOp()).build())
                    .layer(1, new OutputLayer.Builder(lf).activation(outputActivation).nOut(3)
                            .weightInit(WeightInit.XAVIER).updater(new NoOp()).build())

                    .setInputType(InputType.convolutionalFlat(1, 4, 1));

            MultiLayerConfiguration conf = builder.build();

            MultiLayerNetwork mln = new MultiLayerNetwork(conf);
            mln.init();
            String testName = new Object() {
            }.getClass().getEnclosingMethod().getName();

            if (doLearningFirst) {
                //Run a number of iterations of learning
                mln.setInput(ds.getFeatures());
                mln.setLabels(ds.getLabels());
                mln.computeGradientAndScore();
                double scoreBefore = mln.score();
                for (int j = 0; j < 10; j++)
                    mln.fit(ds);
                mln.computeGradientAndScore();
                double scoreAfter = mln.score();
                //Can't test in 'characteristic mode of operation' if not learning
                String msg = testName
                        + "- score did not (sufficiently) decrease during learning - activationFn="
                        + afn + ", lossFn=" + lf + ", outputActivation=" + outputActivation
                        + ", doLearningFirst=" + doLearningFirst + " (before=" + scoreBefore
                        + ", scoreAfter=" + scoreAfter + ")";
                assertTrue(msg, scoreAfter < 0.8 * scoreBefore);
            }

            if (PRINT_RESULTS) {
                System.out.println(testName + "- activationFn=" + afn + ", lossFn=" + lf
                        + ", outputActivation=" + outputActivation + ", doLearningFirst="
                        + doLearningFirst);
//                for (int j = 0; j < mln.getnLayers(); j++)
//                    System.out.println("Layer " + j + " # params: " + mln.getLayer(j).numParams());
            }

            boolean gradOK = GradientCheckUtil.checkGradients(mln, DEFAULT_EPS, DEFAULT_MAX_REL_ERROR,
                    DEFAULT_MIN_ABS_ERROR, PRINT_RESULTS, RETURN_ON_FIRST_FAILURE, input, labels);

            assertTrue(gradOK);
            TestUtils.testModelSerialization(mln);
        }
    }
 
Example 11
Source File: CNN1DGradientCheckTest.java    From deeplearning4j with Apache License 2.0 4 votes vote down vote up
@Test
    public void testCnn1DWithSubsampling1D() {
        Nd4j.getRandom().setSeed(12345);

        int[] minibatchSizes = {1, 3};
        int length = 7;
        int convNIn = 2;
        int convNOut1 = 3;
        int convNOut2 = 4;
        int finalNOut = 4;

        int[] kernels = {1, 2, 4};
        int stride = 1;
        int padding = 0;
        int pnorm = 2;

        Activation[] activations = {Activation.SIGMOID, Activation.TANH};
        SubsamplingLayer.PoolingType[] poolingTypes =
                new SubsamplingLayer.PoolingType[]{SubsamplingLayer.PoolingType.MAX,
                        SubsamplingLayer.PoolingType.AVG, SubsamplingLayer.PoolingType.PNORM};

        for (Activation afn : activations) {
            for (SubsamplingLayer.PoolingType poolingType : poolingTypes) {
                for (int minibatchSize : minibatchSizes) {
                    for (int kernel : kernels) {
                        INDArray input = Nd4j.rand(new int[]{minibatchSize, convNIn, length});
                        INDArray labels = Nd4j.zeros(minibatchSize, finalNOut, length);
                        for (int i = 0; i < minibatchSize; i++) {
                            for (int j = 0; j < length; j++) {
                                labels.putScalar(new int[]{i, i % finalNOut, j}, 1.0);
                            }
                        }

                        MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder()
                                .dataType(DataType.DOUBLE)
                                .updater(new NoOp())
                                .dist(new NormalDistribution(0, 1)).convolutionMode(ConvolutionMode.Same).list()
                                .layer(0, new Convolution1DLayer.Builder().activation(afn).kernelSize(kernel)
                                        .stride(stride).padding(padding).nIn(convNIn).nOut(convNOut1)
                                        .build())
                                .layer(1, new Convolution1DLayer.Builder().activation(afn).kernelSize(kernel)
                                        .stride(stride).padding(padding).nIn(convNOut1).nOut(convNOut2)
                                        .build())
                                .layer(2, new Subsampling1DLayer.Builder(poolingType).kernelSize(kernel)
                                        .stride(stride).padding(padding).pnorm(pnorm).build())
                                .layer(3, new RnnOutputLayer.Builder(LossFunctions.LossFunction.MCXENT)
                                        .activation(Activation.SOFTMAX).nOut(finalNOut).build())
                                .setInputType(InputType.recurrent(convNIn, length)).build();

                        String json = conf.toJson();
                        MultiLayerConfiguration c2 = MultiLayerConfiguration.fromJson(json);
                        assertEquals(conf, c2);

                        MultiLayerNetwork net = new MultiLayerNetwork(conf);
                        net.init();

                        String msg = "PoolingType=" + poolingType + ", minibatch=" + minibatchSize + ", activationFn="
                                + afn + ", kernel = " + kernel;

                        if (PRINT_RESULTS) {
                            System.out.println(msg);
//                            for (int j = 0; j < net.getnLayers(); j++)
//                                System.out.println("Layer " + j + " # params: " + net.getLayer(j).numParams());
                        }

                        boolean gradOK = GradientCheckUtil.checkGradients(net, DEFAULT_EPS, DEFAULT_MAX_REL_ERROR,
                                DEFAULT_MIN_ABS_ERROR, PRINT_RESULTS, RETURN_ON_FIRST_FAILURE, input, labels);

                        assertTrue(msg, gradOK);
                        TestUtils.testModelSerialization(net);
                    }
                }
            }
        }
    }
 
Example 12
Source File: CNNGradientCheckTest.java    From deeplearning4j with Apache License 2.0 4 votes vote down vote up
@Test
    public void testCnnMultiLayer() {
        int nOut = 2;

        int[] minibatchSizes = {1, 2, 5};
        int width = 5;
        int height = 5;
        int[] inputDepths = {1, 2, 4};

        Activation[] activations = {Activation.SIGMOID, Activation.TANH};
        SubsamplingLayer.PoolingType[] poolingTypes = new SubsamplingLayer.PoolingType[]{
                SubsamplingLayer.PoolingType.MAX, SubsamplingLayer.PoolingType.AVG};

        Nd4j.getRandom().setSeed(12345);

        for (int inputDepth : inputDepths) {
            for (Activation afn : activations) {
                for (SubsamplingLayer.PoolingType poolingType : poolingTypes) {
                    for (int minibatchSize : minibatchSizes) {
                        INDArray input = Nd4j.rand(minibatchSize, width * height * inputDepth);
                        INDArray labels = Nd4j.zeros(minibatchSize, nOut);
                        for (int i = 0; i < minibatchSize; i++) {
                            labels.putScalar(new int[]{i, i % nOut}, 1.0);
                        }

                        MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder().seed(12345).updater(new NoOp())
                                .dataType(DataType.DOUBLE)
                                .activation(afn)
                                .list()
                                .layer(0, new ConvolutionLayer.Builder().kernelSize(2, 2).stride(1, 1)
                                        .cudnnAllowFallback(false)
                                        .padding(0, 0).nIn(inputDepth).nOut(2).build())//output: (5-2+0)/1+1 = 4
                                .layer(1, new ConvolutionLayer.Builder().nIn(2).nOut(2).kernelSize(2, 2)
                                        .cudnnAllowFallback(false)
                                        .stride(1, 1).padding(0, 0).build()) //(4-2+0)/1+1 = 3
                                .layer(2, new ConvolutionLayer.Builder().nIn(2).nOut(2).kernelSize(2, 2)
                                        .cudnnAllowFallback(false)
                                        .stride(1, 1).padding(0, 0).build()) //(3-2+0)/1+1 = 2
                                .layer(3, new OutputLayer.Builder(LossFunctions.LossFunction.MCXENT)
                                        .activation(Activation.SOFTMAX).nIn(2 * 2 * 2).nOut(nOut)
                                        .build())
                                .setInputType(InputType.convolutionalFlat(height, width, inputDepth)).build();

                        assertEquals(ConvolutionMode.Truncate,
                                ((ConvolutionLayer) conf.getConf(0).getLayer()).getConvolutionMode());

                        MultiLayerNetwork net = new MultiLayerNetwork(conf);
                        net.init();

//                        for (int i = 0; i < 4; i++) {
//                            System.out.println("nParams, layer " + i + ": " + net.getLayer(i).numParams());
//                        }

                        String msg = "PoolingType=" + poolingType + ", minibatch=" + minibatchSize + ", activationFn="
                                + afn;
                        System.out.println(msg);

                        boolean gradOK = GradientCheckUtil.checkGradients(net, DEFAULT_EPS, DEFAULT_MAX_REL_ERROR,
                                DEFAULT_MIN_ABS_ERROR, PRINT_RESULTS, RETURN_ON_FIRST_FAILURE, input, labels);

                        assertTrue(msg, gradOK);

                        TestUtils.testModelSerialization(net);
                    }
                }
            }
        }
    }
 
Example 13
Source File: LSTMGradientCheckTests.java    From deeplearning4j with Apache License 2.0 4 votes vote down vote up
@Test
    public void testGradientGravesBidirectionalLSTMFull() {
        Activation[] activFns = {Activation.TANH, Activation.SOFTSIGN};

        LossFunction[] lossFunctions = {LossFunction.MCXENT, LossFunction.MSE};
        Activation[] outputActivations = {Activation.SOFTMAX, Activation.TANH}; //i.e., lossFunctions[i] used with outputActivations[i] here

        int timeSeriesLength = 3;
        int nIn = 2;
        int layerSize = 2;
        int nOut = 2;
        int miniBatchSize = 3;

        Random r = new Random(12345L);
        INDArray input = Nd4j.rand(DataType.DOUBLE, miniBatchSize, nIn, timeSeriesLength).subi(0.5);

        INDArray labels = TestUtils.randomOneHotTimeSeries(miniBatchSize, nOut, timeSeriesLength);

        //use l2vals[i] with l1vals[i]
        double[] l2vals = {0.4, 0.0};
        double[] l1vals = {0.5, 0.0};
        double[] biasL2 = {0.0, 0.2};
        double[] biasL1 = {0.0, 0.6};

        for (int i = 0; i < lossFunctions.length; i++) {
            for (int k = 0; k < l2vals.length; k++) {
                Activation afn = activFns[i];
                LossFunction lf = lossFunctions[i];
                Activation outputActivation = outputActivations[i];
                double l2 = l2vals[k];
                double l1 = l1vals[k];

                NeuralNetConfiguration.Builder conf =
                        new NeuralNetConfiguration.Builder();
                if (l1 > 0.0)
                    conf.l1(l1);
                if (l2 > 0.0)
                    conf.l2(l2);
                if (biasL2[k] > 0)
                    conf.l2Bias(biasL2[k]);
                if (biasL1[k] > 0)
                    conf.l1Bias(biasL1[k]);

                MultiLayerConfiguration mlc = conf.seed(12345L)
                        .dataType(DataType.DOUBLE)
                        .updater(new NoOp())
                        .list().layer(0,
                                new GravesBidirectionalLSTM.Builder().nIn(nIn).nOut(layerSize)
                                        .weightInit(new NormalDistribution(0, 1))
                                        .activation(afn)
                                        .build())
                        .layer(1, new RnnOutputLayer.Builder(lf).activation(outputActivation).nIn(layerSize)
                                .nOut(nOut)
                                .dist(new NormalDistribution(0, 1)).updater(new NoOp()).build())
                        .build();


                MultiLayerNetwork mln = new MultiLayerNetwork(mlc);

                mln.init();

                if (PRINT_RESULTS) {
                    System.out.println("testGradientGravesBidirectionalLSTMFull() - activationFn=" + afn
                            + ", lossFn=" + lf + ", outputActivation=" + outputActivation + ", l2=" + l2
                            + ", l1=" + l1);
//                    for (int j = 0; j < mln.getnLayers(); j++)
//                        System.out.println("Layer " + j + " # params: " + mln.getLayer(j).numParams());
                }

                boolean gradOK = GradientCheckUtil.checkGradients(mln, DEFAULT_EPS, DEFAULT_MAX_REL_ERROR,
                        DEFAULT_MIN_ABS_ERROR, PRINT_RESULTS, RETURN_ON_FIRST_FAILURE, input, labels);

                String msg = "testGradientGravesLSTMFull() - activationFn=" + afn + ", lossFn=" + lf
                        + ", outputActivation=" + outputActivation + ", l2=" + l2 + ", l1=" + l1;
                assertTrue(msg, gradOK);
                TestUtils.testModelSerialization(mln);
            }
        }
    }
 
Example 14
Source File: CNNGradientCheckTest.java    From deeplearning4j with Apache License 2.0 4 votes vote down vote up
@Test
    public void testGradientCNNL1L2MLN() {
        //Parameterized test, testing combinations of:
        // (a) activation function
        // (b) Whether to test at random initialization, or after some learning (i.e., 'characteristic mode of operation')
        // (c) Loss function (with specified output activations)

        DataSet ds = new IrisDataSetIterator(150, 150).next();
        ds.normalizeZeroMeanZeroUnitVariance();
        INDArray input = ds.getFeatures();
        INDArray labels = ds.getLabels();

        //use l2vals[i] with l1vals[i]
        double[] l2vals = {0.4, 0.0, 0.4, 0.4};
        double[] l1vals = {0.0, 0.0, 0.5, 0.0};
        double[] biasL2 = {0.0, 0.0, 0.0, 0.2};
        double[] biasL1 = {0.0, 0.0, 0.6, 0.0};
        Activation[] activFns = {Activation.SIGMOID, Activation.TANH, Activation.ELU, Activation.SOFTPLUS};
        boolean[] characteristic = {false, true, false, true}; //If true: run some backprop steps first

        LossFunctions.LossFunction[] lossFunctions =
                {LossFunctions.LossFunction.NEGATIVELOGLIKELIHOOD, LossFunctions.LossFunction.MSE, LossFunctions.LossFunction.NEGATIVELOGLIKELIHOOD, LossFunctions.LossFunction.MSE};
        Activation[] outputActivations = {Activation.SOFTMAX, Activation.TANH, Activation.SOFTMAX, Activation.IDENTITY}; //i.e., lossFunctions[i] used with outputActivations[i] here

        for( int i=0; i<l2vals.length; i++ ){
            Activation afn = activFns[i];
            boolean doLearningFirst = characteristic[i];
            LossFunctions.LossFunction lf = lossFunctions[i];
            Activation outputActivation = outputActivations[i];
            double l2 = l2vals[i];
            double l1 = l1vals[i];

            MultiLayerConfiguration.Builder builder = new NeuralNetConfiguration.Builder()
                    .dataType(DataType.DOUBLE)
                    .l2(l2).l1(l1).l2Bias(biasL2[i]).l1Bias(biasL1[i])
                    .optimizationAlgo(
                            OptimizationAlgorithm.CONJUGATE_GRADIENT)
                    .seed(12345L).list()
                    .layer(0, new ConvolutionLayer.Builder(new int[]{1, 1}).nIn(1).nOut(6)
                            .weightInit(WeightInit.XAVIER).activation(afn)
                            .updater(new NoOp()).build())
                    .layer(1, new OutputLayer.Builder(lf).activation(outputActivation).nOut(3)
                            .weightInit(WeightInit.XAVIER).updater(new NoOp()).build())

                    .setInputType(InputType.convolutionalFlat(1, 4, 1));

            MultiLayerConfiguration conf = builder.build();

            MultiLayerNetwork mln = new MultiLayerNetwork(conf);
            mln.init();
            String testName = new Object() {
            }.getClass().getEnclosingMethod().getName();

            if (doLearningFirst) {
                //Run a number of iterations of learning
                mln.setInput(ds.getFeatures());
                mln.setLabels(ds.getLabels());
                mln.computeGradientAndScore();
                double scoreBefore = mln.score();
                for (int j = 0; j < 10; j++)
                    mln.fit(ds);
                mln.computeGradientAndScore();
                double scoreAfter = mln.score();
                //Can't test in 'characteristic mode of operation' if not learning
                String msg = testName
                        + "- score did not (sufficiently) decrease during learning - activationFn="
                        + afn + ", lossFn=" + lf + ", outputActivation=" + outputActivation
                        + ", doLearningFirst=" + doLearningFirst + " (before=" + scoreBefore
                        + ", scoreAfter=" + scoreAfter + ")";
                assertTrue(msg, scoreAfter < 0.8 * scoreBefore);
            }

            if (PRINT_RESULTS) {
                System.out.println(testName + "- activationFn=" + afn + ", lossFn=" + lf
                        + ", outputActivation=" + outputActivation + ", doLearningFirst="
                        + doLearningFirst);
//                for (int j = 0; j < mln.getnLayers(); j++)
//                    System.out.println("Layer " + j + " # params: " + mln.getLayer(j).numParams());
            }

            boolean gradOK = GradientCheckUtil.checkGradients(mln, DEFAULT_EPS, DEFAULT_MAX_REL_ERROR,
                    DEFAULT_MIN_ABS_ERROR, PRINT_RESULTS, RETURN_ON_FIRST_FAILURE, input, labels);

            assertTrue(gradOK);
            TestUtils.testModelSerialization(mln);
        }
    }
 
Example 15
Source File: MultiLayerNeuralNetConfigurationTest.java    From deeplearning4j with Apache License 2.0 4 votes vote down vote up
@Test
public void testInvalidOutputLayer(){
    /*
    Test case (invalid configs)
    1. nOut=1 + softmax
    2. mcxent + tanh
    3. xent + softmax
    4. xent + relu
    5. mcxent + sigmoid
     */

    LossFunctions.LossFunction[] lf = new LossFunctions.LossFunction[]{
            LossFunctions.LossFunction.MCXENT, LossFunctions.LossFunction.MCXENT, LossFunctions.LossFunction.XENT,
            LossFunctions.LossFunction.XENT, LossFunctions.LossFunction.MCXENT};
    int[] nOut = new int[]{1, 3, 3, 3, 3};
    Activation[] activations = new Activation[]{Activation.SOFTMAX, Activation.TANH, Activation.SOFTMAX, Activation.RELU, Activation.SIGMOID};
    for( int i=0; i<lf.length; i++ ){
        for(boolean lossLayer : new boolean[]{false, true}) {
            for (boolean validate : new boolean[]{true, false}) {
                String s = "nOut=" + nOut[i] + ",lossFn=" + lf[i] + ",lossLayer=" + lossLayer + ",validate=" + validate;
                if(nOut[i] == 1 && lossLayer)
                    continue;   //nOuts are not availabel in loss layer, can't expect it to detect this case
                try {
                    new NeuralNetConfiguration.Builder()
                            .list()
                            .layer(new DenseLayer.Builder().nIn(10).nOut(10).build())
                            .layer(!lossLayer ? new OutputLayer.Builder().nIn(10).nOut(nOut[i]).activation(activations[i]).lossFunction(lf[i]).build()
                                            : new LossLayer.Builder().activation(activations[i]).lossFunction(lf[i]).build())
                            .validateOutputLayerConfig(validate)
                            .build();
                    if (validate) {
                        fail("Expected exception: " + s);
                    }
                } catch (DL4JInvalidConfigException e) {
                    if (validate) {
                        assertTrue(s, e.getMessage().toLowerCase().contains("invalid output"));
                    } else {
                        fail("Validation should not be enabled");
                    }
                }
            }
        }
    }
}
 
Example 16
Source File: CNNGradientCheckTest.java    From deeplearning4j with Apache License 2.0 4 votes vote down vote up
@Test
    public void testDeconvolution2D() {
        int nOut = 2;

        int[] minibatchSizes = new int[]{1, 3, 3, 1, 3};
        int[] kernelSizes = new int[]{1, 1, 1, 3, 3};
        int[] strides = {1, 1, 2, 2, 2};
        int[] dilation = {1, 2, 1, 2, 2};
        Activation[] activations = new Activation[]{Activation.SIGMOID, Activation.TANH, Activation.SIGMOID, Activation.SIGMOID, Activation.SIGMOID};
        ConvolutionMode[] cModes = new ConvolutionMode[]{Same, Same, Truncate, Truncate, Truncate};
        int width = 7;
        int height = 7;
        int inputDepth = 3;

        Nd4j.getRandom().setSeed(12345);

        for (int i = 0; i < minibatchSizes.length; i++) {
            int minibatchSize = minibatchSizes[i];
            int k = kernelSizes[i];
            int s = strides[i];
            int d = dilation[i];
            ConvolutionMode cm = cModes[i];
            Activation act = activations[i];


            int w = d * width;
            int h = d * height;

            INDArray input = Nd4j.rand(minibatchSize, w * h * inputDepth);
            INDArray labels = Nd4j.zeros(minibatchSize, nOut);
            for (int j = 0; j < minibatchSize; j++) {
                labels.putScalar(new int[]{j, j % nOut}, 1.0);
            }

            NeuralNetConfiguration.ListBuilder b = new NeuralNetConfiguration.Builder().seed(12345)
                    .dataType(DataType.DOUBLE)
                    .updater(new NoOp())
                    .activation(act)
                    .list()
                    .layer(new Deconvolution2D.Builder().name("deconvolution_2D_layer")
                            .kernelSize(k, k)
                            .stride(s, s)
                            .dilation(d, d)
                            .convolutionMode(cm)
                            .nIn(inputDepth).nOut(nOut).build());

            MultiLayerConfiguration conf = b.layer(new OutputLayer.Builder(LossFunctions.LossFunction.MCXENT)
                    .activation(Activation.SOFTMAX).nOut(nOut).build())
                    .setInputType(InputType.convolutionalFlat(h, w, inputDepth)).build();

            MultiLayerNetwork net = new MultiLayerNetwork(conf);
            net.init();

//            for (int j = 0; j < net.getLayers().length; j++) {
//                System.out.println("nParams, layer " + j + ": " + net.getLayer(j).numParams());
//            }

            String msg = " - mb=" + minibatchSize + ", k="
                    + k + ", s=" + s + ", d=" + d + ", cm=" + cm;
            System.out.println(msg);

            boolean gradOK = GradientCheckUtil.checkGradients(
                    new GradientCheckUtil.MLNConfig().net(net)
                            .input(input).labels(labels)
                            .subset(true).maxPerParam(100));

            assertTrue(msg, gradOK);

            TestUtils.testModelSerialization(net);
        }
    }
 
Example 17
Source File: TestSameDiffDense.java    From deeplearning4j with Apache License 2.0 4 votes vote down vote up
@Test
public void gradientCheck() {
    int nIn = 4;
    int nOut = 4;

    for (boolean workspaces : new boolean[]{true, false}) {
        for (Activation a : new Activation[]{Activation.TANH, Activation.IDENTITY}) {

            String msg = "workspaces: " + workspaces + ", " + a;
            Nd4j.getRandom().setSeed(12345);

            MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder()
                    .dataType(DataType.DOUBLE)
                    .seed(12345)
                    .updater(new NoOp())
                    .trainingWorkspaceMode(workspaces ? WorkspaceMode.ENABLED : WorkspaceMode.NONE)
                    .inferenceWorkspaceMode(workspaces ? WorkspaceMode.ENABLED : WorkspaceMode.NONE)
                    .list()
                    .layer(new SameDiffDense.Builder().nIn(nIn).nOut(nOut).activation(a).build())
                    .layer(new SameDiffDense.Builder().nIn(nOut).nOut(nOut).activation(a).build())
                    .layer(new OutputLayer.Builder().nIn(nOut).nOut(nOut).activation(Activation.SOFTMAX)
                            .lossFunction(LossFunctions.LossFunction.MCXENT).build())
                    //.setInputType(InputType.feedForward(nIn))     //TODO
                    .build();

            MultiLayerNetwork net = new MultiLayerNetwork(conf);
            net.init();

            INDArray f = Nd4j.rand(3, nIn);
            INDArray l = TestUtils.randomOneHot(3, nOut);

            log.info("Starting: " + msg);
            boolean gradOK = GradientCheckUtil.checkGradients(net, DEFAULT_EPS, DEFAULT_MAX_REL_ERROR,
                    DEFAULT_MIN_ABS_ERROR, PRINT_RESULTS, RETURN_ON_FIRST_FAILURE, f, l);

            assertTrue(msg, gradOK);

            TestUtils.testModelSerialization(net);

            //Sanity check on different minibatch sizes:
            INDArray newIn = Nd4j.vstack(f, f);
            net.output(newIn);
        }
    }
}
 
Example 18
Source File: GradientCheckTests.java    From deeplearning4j with Apache License 2.0 4 votes vote down vote up
@Test
    public void elementWiseMultiplicationLayerTest(){

        for(Activation a : new Activation[]{Activation.IDENTITY, Activation.TANH}) {

            ComputationGraphConfiguration conf = new NeuralNetConfiguration.Builder()
                    .dataType(DataType.DOUBLE)
                    .optimizationAlgo(OptimizationAlgorithm.CONJUGATE_GRADIENT).updater(new NoOp())
                    .seed(12345L)
                    .weightInit(new UniformDistribution(0, 1))
                    .graphBuilder()
                    .addInputs("features")
                    .addLayer("dense", new DenseLayer.Builder().nIn(4).nOut(4)
                            .activation(Activation.TANH)
                            .build(), "features")
                    .addLayer("elementWiseMul", new ElementWiseMultiplicationLayer.Builder().nIn(4).nOut(4)
                            .activation(a)
                            .build(), "dense")
                    .addLayer("loss", new LossLayer.Builder(LossFunctions.LossFunction.COSINE_PROXIMITY)
                            .activation(Activation.IDENTITY).build(), "elementWiseMul")
                    .setOutputs("loss")
                    .build();

            ComputationGraph netGraph = new ComputationGraph(conf);
            netGraph.init();

            log.info("params before learning: " + netGraph.getLayer(1).paramTable());

            //Run a number of iterations of learning manually make some pseudo data
            //the ides is simple: since we do a element wise multiplication layer (just a scaling), we want the cos sim
            // is mainly decided by the fourth value, if everything runs well, we will get a large weight for the fourth value

            INDArray features = Nd4j.create(new double[][]{{1, 2, 3, 4}, {1, 2, 3, 1}, {1, 2, 3, 0}});
            INDArray labels = Nd4j.create(new double[][]{{1, 1, 1, 8}, {1, 1, 1, 2}, {1, 1, 1, 1}});

            netGraph.setInputs(features);
            netGraph.setLabels(labels);
            netGraph.computeGradientAndScore();
            double scoreBefore = netGraph.score();

            String msg;
            for (int epoch = 0; epoch < 5; epoch++)
                netGraph.fit(new INDArray[]{features}, new INDArray[]{labels});
            netGraph.computeGradientAndScore();
            double scoreAfter = netGraph.score();
            //Can't test in 'characteristic mode of operation' if not learning
            msg = "elementWiseMultiplicationLayerTest() - score did not (sufficiently) decrease during learning - activationFn="
                    + "Id" + ", lossFn=" + "Cos-sim" + ", outputActivation=" + "Id"
                    + ", doLearningFirst=" + "true" + " (before=" + scoreBefore
                    + ", scoreAfter=" + scoreAfter + ")";
            assertTrue(msg, scoreAfter < 0.8 * scoreBefore);

//        expectation in case linear regression(with only element wise multiplication layer): large weight for the fourth weight
            log.info("params after learning: " + netGraph.getLayer(1).paramTable());

            boolean gradOK = GradientCheckUtil.checkGradients(new GradientCheckUtil.GraphConfig().net(netGraph).inputs(new INDArray[]{features})
                    .labels(new INDArray[]{labels}));

            msg = "elementWiseMultiplicationLayerTest() - activationFn=" + "ID" + ", lossFn=" + "Cos-sim"
                    + ", outputActivation=" + "Id" + ", doLearningFirst=" + "true";
            assertTrue(msg, gradOK);

            TestUtils.testModelSerialization(netGraph);
        }
    }
 
Example 19
Source File: GradientCheckTests.java    From deeplearning4j with Apache License 2.0 4 votes vote down vote up
@Test
    public void testAutoEncoder() {
        //As above (testGradientMLP2LayerIrisSimple()) but with L2, L1, and both L2/L1 applied
        //Need to run gradient through updater, so that L2 can be applied

        Activation[] activFns = {Activation.SIGMOID, Activation.TANH};
        boolean[] characteristic = {false, true}; //If true: run some backprop steps first

        LossFunction[] lossFunctions = {LossFunction.MCXENT, LossFunction.MSE};
        Activation[] outputActivations = {Activation.SOFTMAX, Activation.TANH};

        DataNormalization scaler = new NormalizerMinMaxScaler();
        DataSetIterator iter = new IrisDataSetIterator(150, 150);
        scaler.fit(iter);
        iter.setPreProcessor(scaler);
        DataSet ds = iter.next();
        INDArray input = ds.getFeatures();
        INDArray labels = ds.getLabels();

        NormalizerStandardize norm = new NormalizerStandardize();
        norm.fit(ds);
        norm.transform(ds);

        double[] l2vals = {0.2, 0.0, 0.2};
        double[] l1vals = {0.0, 0.3, 0.3}; //i.e., use l2vals[i] with l1vals[i]

        for (Activation afn : activFns) {
            for (boolean doLearningFirst : characteristic) {
                for (int i = 0; i < lossFunctions.length; i++) {
                    for (int k = 0; k < l2vals.length; k++) {
                        LossFunction lf = lossFunctions[i];
                        Activation outputActivation = outputActivations[i];
                        double l2 = l2vals[k];
                        double l1 = l1vals[k];

                        Nd4j.getRandom().setSeed(12345);
                        MultiLayerConfiguration conf =
                                        new NeuralNetConfiguration.Builder()
                                                        .dataType(DataType.DOUBLE)
                                                        .updater(new NoOp())
                                                        .l2(l2).l1(l1)
                                                        .optimizationAlgo(OptimizationAlgorithm.CONJUGATE_GRADIENT)
                                                        .seed(12345L)
                                                        .dist(new NormalDistribution(0, 1))
                                                        .list().layer(0,
                                                                        new AutoEncoder.Builder().nIn(4).nOut(3)
                                                                                        .activation(afn).build())
                                                        .layer(1, new OutputLayer.Builder(lf).nIn(3).nOut(3)
                                                                        .activation(outputActivation).build())
                                                        .build();

                        MultiLayerNetwork mln = new MultiLayerNetwork(conf);
                        mln.init();

                        String msg;
                        if (doLearningFirst) {
                            //Run a number of iterations of learning
                            mln.setInput(ds.getFeatures());
                            mln.setLabels(ds.getLabels());
                            mln.computeGradientAndScore();
                            double scoreBefore = mln.score();
                            for (int j = 0; j < 10; j++)
                                mln.fit(ds);
                            mln.computeGradientAndScore();
                            double scoreAfter = mln.score();
                            //Can't test in 'characteristic mode of operation' if not learning
                            msg = "testGradMLP2LayerIrisSimple() - score did not (sufficiently) decrease during learning - activationFn="
                                            + afn + ", lossFn=" + lf + ", outputActivation=" + outputActivation
                                            + ", doLearningFirst=" + doLearningFirst + ", l2=" + l2 + ", l1=" + l1
                                            + " (before=" + scoreBefore + ", scoreAfter=" + scoreAfter + ")";
                            assertTrue(msg, scoreAfter < scoreBefore);
                        }

                        msg = "testGradMLP2LayerIrisSimple() - activationFn=" + afn + ", lossFn=" + lf
                                        + ", outputActivation=" + outputActivation + ", doLearningFirst="
                                        + doLearningFirst + ", l2=" + l2 + ", l1=" + l1;
                        if (PRINT_RESULTS) {
                            System.out.println(msg);
//                            for (int j = 0; j < mln.getnLayers(); j++)
//                                System.out.println("Layer " + j + " # params: " + mln.getLayer(j).numParams());
                        }

                        boolean gradOK = GradientCheckUtil.checkGradients(mln, DEFAULT_EPS, DEFAULT_MAX_REL_ERROR,
                                        DEFAULT_MIN_ABS_ERROR, PRINT_RESULTS, RETURN_ON_FIRST_FAILURE, input, labels);
                        assertTrue(msg, gradOK);
                        TestUtils.testModelSerialization(mln);
                    }
                }
            }
        }
    }
 
Example 20
Source File: SameDiffTests.java    From deeplearning4j with Apache License 2.0 4 votes vote down vote up
@Test
    public void testActivationBackprop() {

        Activation[] afns = new Activation[]{
                Activation.TANH,
                Activation.SIGMOID,
                Activation.ELU,
                Activation.SOFTPLUS,
                Activation.SOFTSIGN,
                Activation.HARDTANH,
                Activation.CUBE,
                //WRONG output - see issue https://github.com/deeplearning4j/nd4j/issues/2426
                Activation.RELU,            //JVM crash
                Activation.LEAKYRELU        //JVM crash
        };

        for (Activation a : afns) {

            SameDiff sd = SameDiff.create();
            INDArray inArr = Nd4j.linspace(-3, 3, 7);
            INDArray labelArr = Nd4j.linspace(-3, 3, 7).muli(0.5);
            SDVariable in = sd.var("in", inArr.dup());

//            System.out.println("inArr: " + inArr);

            INDArray outExp;
            SDVariable out;
            switch (a) {
                case ELU:
                    out = sd.nn().elu("out", in);
                    outExp = Transforms.elu(inArr, true);
                    break;
                case HARDTANH:
                    out = sd.nn().hardTanh("out", in);
                    outExp = Transforms.hardTanh(inArr, true);
                    break;
                case LEAKYRELU:
                    out = sd.nn().leakyRelu("out", in, 0.01);
                    outExp = Transforms.leakyRelu(inArr, true);
                    break;
                case RELU:
                    out = sd.nn().relu("out", in, 0.0);
                    outExp = Transforms.relu(inArr, true);
                    break;
                case SIGMOID:
                    out = sd.nn().sigmoid("out", in);
                    outExp = Transforms.sigmoid(inArr, true);
                    break;
                case SOFTPLUS:
                    out = sd.nn().softplus("out", in);
                    outExp = Transforms.softPlus(inArr, true);
                    break;
                case SOFTSIGN:
                    out = sd.nn().softsign("out", in);
                    outExp = Transforms.softsign(inArr, true);
                    break;
                case TANH:
                    out = sd.math().tanh("out", in);
                    outExp = Transforms.tanh(inArr, true);
                    break;
                case CUBE:
                    out = sd.math().cube("out", in);
                    outExp = Transforms.pow(inArr, 3, true);
                    break;
                default:
                    throw new RuntimeException(a.toString());
            }

            //Sum squared error loss:
            SDVariable label = sd.var("label", labelArr.dup());
            SDVariable diff = label.sub("diff", out);
            SDVariable sqDiff = diff.mul("sqDiff", diff);
            SDVariable totSum = sd.sum("totSum", sqDiff, Integer.MAX_VALUE);    //Loss function...

            Map<String,INDArray> m = sd.output(Collections.emptyMap(), "out");
            INDArray outAct = m.get("out");
            assertEquals(a.toString(), outExp, outAct);

            // L = sum_i (label - out)^2
            //dL/dOut = 2(out - label)
            INDArray dLdOutExp = outExp.sub(labelArr).mul(2);
            INDArray dLdInExp = a.getActivationFunction().backprop(inArr.dup(), dLdOutExp.dup()).getFirst();

            Map<String,INDArray> grads = sd.calculateGradients(null, "out", "in");
//            sd.execBackwards(Collections.emptyMap());
//            SameDiff gradFn = sd.getFunction("grad");
            INDArray dLdOutAct = grads.get("out");
            INDArray dLdInAct = grads.get("in");

            assertEquals(a.toString(), dLdOutExp, dLdOutAct);
            assertEquals(a.toString(), dLdInExp, dLdInAct);
        }
    }