Java Code Examples for org.deeplearning4j.nn.conf.NeuralNetConfiguration#addVariable()

The following examples show how to use org.deeplearning4j.nn.conf.NeuralNetConfiguration#addVariable() . You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: PReLUParamInitializer.java    From deeplearning4j with Apache License 2.0 6 votes vote down vote up
@Override
public Map<String, INDArray> init(NeuralNetConfiguration conf, INDArray paramsView, boolean initializeParams) {
    if (!(conf.getLayer() instanceof BaseLayer))
        throw new IllegalArgumentException("unsupported layer type: " + conf.getLayer().getClass().getName());

    Map<String, INDArray> params = Collections.synchronizedMap(new LinkedHashMap<String, INDArray>());

    val length = numParams(conf);
    if (paramsView.length() != length)
        throw new IllegalStateException(
                        "Expected params view of length " + length + ", got length " + paramsView.length());

    INDArray weightView = paramsView.get(NDArrayIndex.interval(0,0,true), NDArrayIndex.interval(0, length));

    params.put(WEIGHT_KEY, createWeightMatrix(conf, weightView, initializeParams));
    conf.addVariable(WEIGHT_KEY);

    return params;
}
 
Example 2
Source File: DepthwiseConvolutionParamInitializer.java    From deeplearning4j with Apache License 2.0 6 votes vote down vote up
@Override
public Map<String, INDArray> init(NeuralNetConfiguration conf, INDArray paramsView, boolean initializeParams) {
    DepthwiseConvolution2D layer = (DepthwiseConvolution2D) conf.getLayer();
    if (layer.getKernelSize().length != 2) throw new IllegalArgumentException("Filter size must be == 2");

    Map<String, INDArray> params = Collections.synchronizedMap(new LinkedHashMap<String, INDArray>());
    DepthwiseConvolution2D layerConf = (DepthwiseConvolution2D) conf.getLayer();

    val depthWiseParams = numDepthWiseParams(layerConf);
    val biasParams = numBiasParams(layerConf);

    INDArray depthWiseWeightView = paramsView.get(
            NDArrayIndex.interval(0,0,true), NDArrayIndex.interval(biasParams, biasParams + depthWiseParams));

    params.put(WEIGHT_KEY, createDepthWiseWeightMatrix(conf, depthWiseWeightView, initializeParams));
    conf.addVariable(WEIGHT_KEY);

    if(layer.hasBias()){
        INDArray biasView = paramsView.get(NDArrayIndex.interval(0,0,true), NDArrayIndex.interval(0, biasParams));
        params.put(BIAS_KEY, createBias(conf, biasView, initializeParams));
        conf.addVariable(BIAS_KEY);
    }

    return params;
}
 
Example 3
Source File: PretrainParamInitializer.java    From deeplearning4j with Apache License 2.0 6 votes vote down vote up
@Override
public Map<String, INDArray> init(NeuralNetConfiguration conf, INDArray paramsView, boolean initializeParams) {
    Map<String, INDArray> params = super.init(conf, paramsView, initializeParams);

    org.deeplearning4j.nn.conf.layers.BasePretrainNetwork layerConf =
                    (org.deeplearning4j.nn.conf.layers.BasePretrainNetwork) conf.getLayer();
    val nIn = layerConf.getNIn();
    val nOut = layerConf.getNOut();
    val nWeightParams = nIn * nOut;

    INDArray visibleBiasView = paramsView.get(NDArrayIndex.interval(0,0,true),
                    NDArrayIndex.interval(nWeightParams + nOut, nWeightParams + nOut + nIn));
    params.put(VISIBLE_BIAS_KEY, createVisibleBias(conf, visibleBiasView, initializeParams));
    conf.addVariable(VISIBLE_BIAS_KEY);

    return params;
}
 
Example 4
Source File: DeepFMParameter.java    From jstarcraft-rns with Apache License 2.0 5 votes vote down vote up
@Override
public Map<String, INDArray> init(NeuralNetConfiguration configuration, INDArray view, boolean initialize) {
    Map<String, INDArray> parameters = Collections.synchronizedMap(new LinkedHashMap<String, INDArray>());
    FeedForwardLayer layerConfiguration = (FeedForwardLayer) configuration.getLayer();
    long numberOfOut = layerConfiguration.getNOut();
    long numberOfWeights = numberOfFeatures * numberOfOut;
    INDArray weight = view.get(new INDArrayIndex[] { NDArrayIndex.point(0), NDArrayIndex.interval(0, numberOfWeights) });
    INDArray bias = view.get(NDArrayIndex.point(0), NDArrayIndex.interval(numberOfWeights, numberOfWeights + numberOfOut));

    parameters.put(WEIGHT_KEY, this.createWeightMatrix(configuration, weight, initialize));
    parameters.put(BIAS_KEY, createBias(configuration, bias, initialize));
    configuration.addVariable(WEIGHT_KEY);
    configuration.addVariable(BIAS_KEY);
    return parameters;
}
 
Example 5
Source File: TestOptimizers.java    From deeplearning4j with Apache License 2.0 5 votes vote down vote up
private static void testRastriginFnMultipleStepsHelper(OptimizationAlgorithm oa, int nOptIter,
                int maxNumLineSearchIter) {
    double[] scores = new double[nOptIter + 1];

    for (int i = 0; i <= nOptIter; i++) {
        NeuralNetConfiguration conf = new NeuralNetConfiguration.Builder()
                        .maxNumLineSearchIterations(maxNumLineSearchIter).miniBatch(false)
                        .updater(new AdaGrad(1e-2))
                        .layer(new DenseLayer.Builder().nIn(1).nOut(1).build()).build();
        conf.addVariable("W"); //Normally done by ParamInitializers, but obviously that isn't done here

        Model m = new RastriginFunctionModel(10, conf);
        int nParams = (int)m.numParams();
        if (i == 0) {
            m.computeGradientAndScore(LayerWorkspaceMgr.noWorkspaces());
            scores[0] = m.score(); //Before optimization
        } else {
            ConvexOptimizer opt = getOptimizer(oa, conf, m);
            opt.getUpdater().setStateViewArray((Layer) m, Nd4j.create(new int[] {1, nParams}, 'c'), true);
            opt.optimize(LayerWorkspaceMgr.noWorkspaces());
            m.computeGradientAndScore(LayerWorkspaceMgr.noWorkspaces());
            scores[i] = m.score();
            assertTrue(!Double.isNaN(scores[i]) && !Double.isInfinite(scores[i]));
        }
    }

    if (PRINT_OPT_RESULTS) {
        System.out.println("Rastrigin: Multiple optimization iterations (" + nOptIter
                        + " opt. iter.) score vs iteration, maxNumLineSearchIter=" + maxNumLineSearchIter + ": "
                        + oa);
        System.out.println(Arrays.toString(scores));
    }
    for (int i = 1; i < scores.length; i++) {
        if (i == 1) {
            assertTrue(scores[i] <= scores[i - 1]); //Require at least one step of improvement
        } else {
            assertTrue(scores[i] <= scores[i - 1]);
        }
    }
}
 
Example 6
Source File: TestOptimizers.java    From deeplearning4j with Apache License 2.0 5 votes vote down vote up
private static void testSphereFnMultipleStepsHelper(OptimizationAlgorithm oa, int nOptIter,
                int maxNumLineSearchIter) {
    double[] scores = new double[nOptIter + 1];

    for (int i = 0; i <= nOptIter; i++) {
        Random rng = new DefaultRandom(12345L);
        org.nd4j.linalg.api.rng.distribution.Distribution dist =
                        new org.nd4j.linalg.api.rng.distribution.impl.UniformDistribution(rng, -10, 10);
        NeuralNetConfiguration conf = new NeuralNetConfiguration.Builder()
                        .maxNumLineSearchIterations(maxNumLineSearchIter).updater(new Sgd(0.1))
                        .layer(new DenseLayer.Builder().nIn(1).nOut(1).build()).build();
        conf.addVariable("W"); //Normally done by ParamInitializers, but obviously that isn't done here

        Model m = new SphereFunctionModel(100, dist, conf);
        if (i == 0) {
            m.computeGradientAndScore(LayerWorkspaceMgr.noWorkspaces());
            scores[0] = m.score(); //Before optimization
        } else {
            ConvexOptimizer opt = getOptimizer(oa, conf, m);
            for( int j=0; j<100; j++ ) {
                opt.optimize(LayerWorkspaceMgr.noWorkspaces());
            }
            m.computeGradientAndScore(LayerWorkspaceMgr.noWorkspaces());
            scores[i] = m.score();
            assertTrue(!Double.isNaN(scores[i]) && !Double.isInfinite(scores[i]));
        }
    }

    if (PRINT_OPT_RESULTS) {
        System.out.println("Multiple optimization iterations (" + nOptIter
                        + " opt. iter.) score vs iteration, maxNumLineSearchIter=" + maxNumLineSearchIter + ": "
                        + oa);
        System.out.println(Arrays.toString(scores));
    }

    for (int i = 1; i < scores.length; i++) {
        assertTrue(scores[i] <= scores[i - 1]);
    }
    assertTrue(scores[scores.length - 1] < 1.0); //Very easy function, expect score ~= 0 with any reasonable number of steps/numLineSearchIter
}
 
Example 7
Source File: OCNNParamInitializer.java    From deeplearning4j with Apache License 2.0 5 votes vote down vote up
@Override
public Map<String, INDArray> init(NeuralNetConfiguration conf, INDArray paramsView, boolean initializeParams) {
    org.deeplearning4j.nn.conf.ocnn.OCNNOutputLayer ocnnOutputLayer = ( org.deeplearning4j.nn.conf.ocnn.OCNNOutputLayer) conf.getLayer();
    Map<String, INDArray> params = Collections.synchronizedMap(new LinkedHashMap<String, INDArray>());
    val nIn = ocnnOutputLayer.getNIn();
    int hiddenLayer = ocnnOutputLayer.getHiddenSize();
    Preconditions.checkState(hiddenLayer > 0, "OCNNOutputLayer hidden layer state: must be non-zero.");

    val firstLayerWeightLength =  hiddenLayer;
    val secondLayerLength = nIn * hiddenLayer;
    int rLength = 1;
    INDArray weightView = paramsView.get(point(0),interval(0, firstLayerWeightLength))
            .reshape(1,hiddenLayer);
    INDArray weightsTwoView = paramsView.get(point(0),
            NDArrayIndex.interval(firstLayerWeightLength,
                    firstLayerWeightLength + secondLayerLength))
            .reshape('f',nIn,hiddenLayer);
    INDArray rView = paramsView.get(point(0),point(paramsView.length() - rLength));


    INDArray paramViewPut = createWeightMatrix(conf, weightView, initializeParams);
    params.put(W_KEY, paramViewPut);
    conf.addVariable(W_KEY);
    INDArray paramIvewPutTwo = createWeightMatrix(conf,weightsTwoView,initializeParams);
    params.put(V_KEY,paramIvewPutTwo);
    conf.addVariable(V_KEY);
    INDArray rViewPut = createWeightMatrix(conf,rView,initializeParams);
    params.put(R_KEY,rViewPut);
    conf.addVariable(R_KEY);

    return params;
}
 
Example 8
Source File: ElementWiseParamInitializer.java    From deeplearning4j with Apache License 2.0 5 votes vote down vote up
/**
 * Initialize the parameters
 *
 * @param conf             the configuration
 * @param paramsView       a view of the full network (backprop) parameters
 * @param initializeParams if true: initialize the parameters according to the configuration. If false: don't modify the
 *                         values in the paramsView array (but do select out the appropriate subset, reshape etc as required)
 * @return Map of parameters keyed by type (view of the 'paramsView' array)
 */
@Override
public Map<String, INDArray> init(NeuralNetConfiguration conf, INDArray paramsView, boolean initializeParams) {
    if (!(conf.getLayer() instanceof org.deeplearning4j.nn.conf.layers.FeedForwardLayer))
        throw new IllegalArgumentException("unsupported layer type: " + conf.getLayer().getClass().getName());

    Map<String, INDArray> params = Collections.synchronizedMap(new LinkedHashMap<String, INDArray>());

    val length = numParams(conf);
    if (paramsView.length() != length)
        throw new IllegalStateException(
                "Expected params view of length " + length + ", got length " + paramsView.length());

    org.deeplearning4j.nn.conf.layers.FeedForwardLayer layerConf =
            (org.deeplearning4j.nn.conf.layers.FeedForwardLayer) conf.getLayer();
    val nIn = layerConf.getNIn();

    val nWeightParams = nIn ;
    INDArray weightView = paramsView.get(NDArrayIndex.interval(0,0,true), NDArrayIndex.interval(0, nWeightParams));
    INDArray biasView = paramsView.get(NDArrayIndex.interval(0,0,true),
            NDArrayIndex.interval(nWeightParams, nWeightParams + nIn));


    params.put(WEIGHT_KEY, createWeightMatrix(conf, weightView, initializeParams));
    params.put(BIAS_KEY, createBias(conf, biasView, initializeParams));
    conf.addVariable(WEIGHT_KEY);
    conf.addVariable(BIAS_KEY);

    return params;
}
 
Example 9
Source File: CenterLossParamInitializer.java    From deeplearning4j with Apache License 2.0 5 votes vote down vote up
@Override
public Map<String, INDArray> init(NeuralNetConfiguration conf, INDArray paramsView, boolean initializeParams) {
    Map<String, INDArray> params = Collections.synchronizedMap(new LinkedHashMap<String, INDArray>());

    org.deeplearning4j.nn.conf.layers.CenterLossOutputLayer layerConf =
                    (org.deeplearning4j.nn.conf.layers.CenterLossOutputLayer) conf.getLayer();

    val nIn = layerConf.getNIn();
    val nOut = layerConf.getNOut(); // also equal to numClasses

    val wEndOffset = nIn * nOut;
    val bEndOffset = wEndOffset + nOut;
    val cEndOffset = bEndOffset + nIn * nOut;

    INDArray weightView = paramsView.get(NDArrayIndex.interval(0,0,true), NDArrayIndex.interval(0, wEndOffset));
    INDArray biasView = paramsView.get(NDArrayIndex.interval(0,0,true), NDArrayIndex.interval(wEndOffset, bEndOffset));
    INDArray centerLossView = paramsView.get(NDArrayIndex.interval(0,0,true), NDArrayIndex.interval(bEndOffset, cEndOffset))
                    .reshape('c', nOut, nIn);

    params.put(WEIGHT_KEY, createWeightMatrix(conf, weightView, initializeParams));
    params.put(BIAS_KEY, createBias(conf, biasView, initializeParams));
    params.put(CENTER_KEY, createCenterLossMatrix(conf, centerLossView, initializeParams));
    conf.addVariable(WEIGHT_KEY);
    conf.addVariable(BIAS_KEY);
    conf.addVariable(CENTER_KEY);

    return params;
}
 
Example 10
Source File: SeparableConvolutionParamInitializer.java    From deeplearning4j with Apache License 2.0 5 votes vote down vote up
@Override
public Map<String, INDArray> init(NeuralNetConfiguration conf, INDArray paramsView, boolean initializeParams) {
    SeparableConvolution2D layer = (SeparableConvolution2D) conf.getLayer();
    if (layer.getKernelSize().length != 2) throw new IllegalArgumentException("Filter size must be == 2");

    Map<String, INDArray> params = Collections.synchronizedMap(new LinkedHashMap<String, INDArray>());
    SeparableConvolution2D layerConf = (SeparableConvolution2D) conf.getLayer();

    val depthWiseParams = numDepthWiseParams(layerConf);
    val biasParams = numBiasParams(layerConf);

    INDArray depthWiseWeightView = paramsView.get(
            NDArrayIndex.interval(0,0,true), NDArrayIndex.interval(biasParams, biasParams + depthWiseParams));
    INDArray pointWiseWeightView = paramsView.get(
            NDArrayIndex.interval(0,0,true), NDArrayIndex.interval(biasParams + depthWiseParams, numParams(conf)));

    params.put(DEPTH_WISE_WEIGHT_KEY, createDepthWiseWeightMatrix(conf, depthWiseWeightView, initializeParams));
    conf.addVariable(DEPTH_WISE_WEIGHT_KEY);
    params.put(POINT_WISE_WEIGHT_KEY, createPointWiseWeightMatrix(conf, pointWiseWeightView, initializeParams));
    conf.addVariable(POINT_WISE_WEIGHT_KEY);

    if(layer.hasBias()){
        INDArray biasView = paramsView.get(NDArrayIndex.interval(0,0,true), NDArrayIndex.interval(0, biasParams));
        params.put(BIAS_KEY, createBias(conf, biasView, initializeParams));
        conf.addVariable(BIAS_KEY);
    }

    return params;
}
 
Example 11
Source File: SameDiffParamInitializer.java    From deeplearning4j with Apache License 2.0 5 votes vote down vote up
@Override
public Map<String, INDArray> init(NeuralNetConfiguration conf, INDArray paramsView, boolean initializeParams) {
    AbstractSameDiffLayer sd = (AbstractSameDiffLayer) conf.getLayer();
    Map<String,INDArray> out = subsetAndReshape(sd.getLayerParams().getParameterKeys(),
            sd.getLayerParams().getParamShapes(), paramsView, sd);
    if(initializeParams){
        sd.initializeParameters(out);
    }

    for(String s : sd.getLayerParams().getParameterKeys()){
        conf.addVariable(s);
    }

    return out;
}
 
Example 12
Source File: CDAEParameter.java    From jstarcraft-rns with Apache License 2.0 5 votes vote down vote up
@Override
public Map<String, INDArray> init(NeuralNetConfiguration conf, INDArray paramsView, boolean initializeParams) {
    Map<String, INDArray> params = super.init(conf, paramsView, initializeParams);
    FeedForwardLayer layerConf = (FeedForwardLayer) conf.getLayer();
    long nIn = layerConf.getNIn();
    long nOut = layerConf.getNOut();
    long nWeightParams = nIn * nOut;
    long nUserWeightParams = numberOfUsers * nOut;
    INDArray userWeightView = paramsView.get(new INDArrayIndex[] { NDArrayIndex.point(0), NDArrayIndex.interval(nWeightParams + nOut, nWeightParams + nOut + nUserWeightParams) });
    params.put(USER_KEY, this.createUserWeightMatrix(conf, userWeightView, initializeParams));
    conf.addVariable(USER_KEY);
    return params;
}
 
Example 13
Source File: SimpleRnnParamInitializer.java    From deeplearning4j with Apache License 2.0 4 votes vote down vote up
@Override
public Map<String, INDArray> init(NeuralNetConfiguration conf, INDArray paramsView, boolean initializeParams) {
    SimpleRnn c = (SimpleRnn)conf.getLayer();
    val nIn = c.getNIn();
    val nOut = c.getNOut();

    Map<String,INDArray> m;

    if (initializeParams) {
        m = getSubsets(paramsView, nIn, nOut, false, hasLayerNorm(c));
        INDArray w = c.getWeightInitFn().init(nIn, nOut, new long[]{nIn, nOut}, 'f', m.get(WEIGHT_KEY));
        m.put(WEIGHT_KEY, w);

        IWeightInit rwInit;
        if (c.getWeightInitFnRecurrent() != null) {
            rwInit = c.getWeightInitFnRecurrent();
        } else {
            rwInit = c.getWeightInitFn();
        }

        INDArray rw = rwInit.init(nOut, nOut, new long[]{nOut, nOut}, 'f', m.get(RECURRENT_WEIGHT_KEY));
        m.put(RECURRENT_WEIGHT_KEY, rw);

        m.get(BIAS_KEY).assign(c.getBiasInit());

        if(hasLayerNorm(c)){
            m.get(GAIN_KEY).assign(c.getGainInit());
        }
    } else {
        m = getSubsets(paramsView, nIn, nOut, true, hasLayerNorm(c));
    }

    conf.addVariable(WEIGHT_KEY);
    conf.addVariable(RECURRENT_WEIGHT_KEY);
    conf.addVariable(BIAS_KEY);
    if(hasLayerNorm(c)){
        conf.addVariable(GAIN_KEY);
    }

    return m;
}
 
Example 14
Source File: LSTMParamInitializer.java    From deeplearning4j with Apache License 2.0 4 votes vote down vote up
@Override
public Map<String, INDArray> init(NeuralNetConfiguration conf, INDArray paramsView, boolean initializeParams) {
    Map<String, INDArray> params = Collections.synchronizedMap(new LinkedHashMap<String, INDArray>());
    org.deeplearning4j.nn.conf.layers.LSTM layerConf = (org.deeplearning4j.nn.conf.layers.LSTM) conf.getLayer();
    double forgetGateInit = layerConf.getForgetGateBiasInit();

    val nL = layerConf.getNOut(); //i.e., n neurons in this layer
    val nLast = layerConf.getNIn(); //i.e., n neurons in previous layer

    conf.addVariable(INPUT_WEIGHT_KEY);
    conf.addVariable(RECURRENT_WEIGHT_KEY);
    conf.addVariable(BIAS_KEY);

    val length = numParams(conf);
    if (paramsView.length() != length)
        throw new IllegalStateException(
                        "Expected params view of length " + length + ", got length " + paramsView.length());

    val nParamsIn = nLast * (4 * nL);
    val nParamsRecurrent = nL * (4 * nL);
    val nBias = 4 * nL;
    INDArray inputWeightView = paramsView.get(NDArrayIndex.interval(0,0,true), NDArrayIndex.interval(0, nParamsIn));
    INDArray recurrentWeightView = paramsView.get(NDArrayIndex.interval(0,0,true),
                    NDArrayIndex.interval(nParamsIn, nParamsIn + nParamsRecurrent));
    INDArray biasView = paramsView.get(NDArrayIndex.interval(0,0,true),
                    NDArrayIndex.interval(nParamsIn + nParamsRecurrent, nParamsIn + nParamsRecurrent + nBias));

    if (initializeParams) {
        val fanIn = nL;
        val fanOut = nLast + nL;
        val inputWShape = new long[] {nLast, 4 * nL};
        val recurrentWShape = new long[] {nL, 4 * nL};

        IWeightInit rwInit;
        if(layerConf.getWeightInitFnRecurrent() != null){
            rwInit = layerConf.getWeightInitFnRecurrent();
        } else {
            rwInit = layerConf.getWeightInitFn();
        }

        params.put(INPUT_WEIGHT_KEY, layerConf.getWeightInitFn().init(fanIn, fanOut, inputWShape,
                IWeightInit.DEFAULT_WEIGHT_INIT_ORDER, inputWeightView));
        params.put(RECURRENT_WEIGHT_KEY, rwInit.init(fanIn, fanOut, recurrentWShape, IWeightInit.DEFAULT_WEIGHT_INIT_ORDER, recurrentWeightView));
        biasView.put(new INDArrayIndex[] {NDArrayIndex.interval(0,0,true), NDArrayIndex.interval(nL, 2 * nL)},
                        Nd4j.valueArrayOf(new long[]{1, nL}, forgetGateInit)); //Order: input, forget, output, input modulation, i.e., IFOG}
        /*The above line initializes the forget gate biases to specified value.
         * See Sutskever PhD thesis, pg19:
         * "it is important for [the forget gate activations] to be approximately 1 at the early stages of learning,
         *  which is accomplished by initializing [the forget gate biases] to a large value (such as 5). If it is
         *  not done, it will be harder to learn long range dependencies because the smaller values of the forget
         *  gates will create a vanishing gradients problem."
         *  http://www.cs.utoronto.ca/~ilya/pubs/ilya_sutskever_phd_thesis.pdf
         */
        params.put(BIAS_KEY, biasView);
    } else {
        params.put(INPUT_WEIGHT_KEY, WeightInitUtil.reshapeWeights(new long[] {nLast, 4 * nL}, inputWeightView));
        params.put(RECURRENT_WEIGHT_KEY,
                        WeightInitUtil.reshapeWeights(new long[] {nL, 4 * nL}, recurrentWeightView));
        params.put(BIAS_KEY, biasView);
    }

    return params;
}
 
Example 15
Source File: GravesLSTMParamInitializer.java    From deeplearning4j with Apache License 2.0 4 votes vote down vote up
@Override
public Map<String, INDArray> init(NeuralNetConfiguration conf, INDArray paramsView, boolean initializeParams) {
    Map<String, INDArray> params = Collections.synchronizedMap(new LinkedHashMap<String, INDArray>());
    org.deeplearning4j.nn.conf.layers.GravesLSTM layerConf =
                    (org.deeplearning4j.nn.conf.layers.GravesLSTM) conf.getLayer();
    double forgetGateInit = layerConf.getForgetGateBiasInit();

    val nL = layerConf.getNOut(); //i.e., n neurons in this layer
    val nLast = layerConf.getNIn(); //i.e., n neurons in previous layer

    conf.addVariable(INPUT_WEIGHT_KEY);
    conf.addVariable(RECURRENT_WEIGHT_KEY);
    conf.addVariable(BIAS_KEY);

    val length = numParams(conf);
    if (paramsView.length() != length)
        throw new IllegalStateException(
                        "Expected params view of length " + length + ", got length " + paramsView.length());

    val nParamsIn = nLast * (4 * nL);
    val nParamsRecurrent = nL * (4 * nL + 3);
    val nBias = 4 * nL;
    INDArray inputWeightView = paramsView.get(NDArrayIndex.interval(0,0,true), NDArrayIndex.interval(0, nParamsIn));
    INDArray recurrentWeightView = paramsView.get(NDArrayIndex.interval(0,0,true),
                    NDArrayIndex.interval(nParamsIn, nParamsIn + nParamsRecurrent));
    INDArray biasView = paramsView.get(NDArrayIndex.interval(0,0,true),
                    NDArrayIndex.interval(nParamsIn + nParamsRecurrent, nParamsIn + nParamsRecurrent + nBias));

    if (initializeParams) {
        val fanIn = nL;
        val fanOut = nLast + nL;
        val inputWShape = new long[] {nLast, 4 * nL};
        val recurrentWShape = new long[] {nL, 4 * nL + 3};

        IWeightInit rwInit;
        if(layerConf.getWeightInitFnRecurrent() != null){
            rwInit = layerConf.getWeightInitFnRecurrent();
        } else {
            rwInit = layerConf.getWeightInitFn();
        }

        params.put(INPUT_WEIGHT_KEY,layerConf.getWeightInitFn().init(fanIn, fanOut, inputWShape,
                        IWeightInit.DEFAULT_WEIGHT_INIT_ORDER, inputWeightView));
        params.put(RECURRENT_WEIGHT_KEY, rwInit.init(fanIn, fanOut, recurrentWShape,
                        IWeightInit.DEFAULT_WEIGHT_INIT_ORDER, recurrentWeightView));
        biasView.put(new INDArrayIndex[] {NDArrayIndex.interval(0,0,true), NDArrayIndex.interval(nL, 2 * nL)},
                        Nd4j.valueArrayOf(new long[]{1, nL}, forgetGateInit)); //Order: input, forget, output, input modulation, i.e., IFOG}
        /*The above line initializes the forget gate biases to specified value.
         * See Sutskever PhD thesis, pg19:
         * "it is important for [the forget gate activations] to be approximately 1 at the early stages of learning,
         *  which is accomplished by initializing [the forget gate biases] to a large value (such as 5). If it is
         *  not done, it will be harder to learn long range dependencies because the smaller values of the forget
         *  gates will create a vanishing gradients problem."
         *  http://www.cs.utoronto.ca/~ilya/pubs/ilya_sutskever_phd_thesis.pdf
         */
        params.put(BIAS_KEY, biasView);
    } else {
        params.put(INPUT_WEIGHT_KEY, WeightInitUtil.reshapeWeights(new long[] {nLast, 4 * nL}, inputWeightView));
        params.put(RECURRENT_WEIGHT_KEY,
                        WeightInitUtil.reshapeWeights(new long[] {nL, 4 * nL + 3}, recurrentWeightView));
        params.put(BIAS_KEY, biasView);
    }

    return params;
}
 
Example 16
Source File: DefaultParamInitializer.java    From deeplearning4j with Apache License 2.0 4 votes vote down vote up
@Override
public Map<String, INDArray> init(NeuralNetConfiguration conf, INDArray paramsView, boolean initializeParams) {
    if (!(conf.getLayer() instanceof org.deeplearning4j.nn.conf.layers.FeedForwardLayer))
        throw new IllegalArgumentException("unsupported layer type: " + conf.getLayer().getClass().getName());

    Map<String, INDArray> params = Collections.synchronizedMap(new LinkedHashMap<String, INDArray>());

    val length = numParams(conf);
    if (paramsView.length() != length)
        throw new IllegalStateException(
                        "Expected params view of length " + length + ", got length " + paramsView.length());

    org.deeplearning4j.nn.conf.layers.FeedForwardLayer layerConf =
                    (org.deeplearning4j.nn.conf.layers.FeedForwardLayer) conf.getLayer();
    val nIn = layerConf.getNIn();
    val nOut = layerConf.getNOut();

    val nWeightParams = nIn * nOut;
    INDArray weightView = paramsView.get(NDArrayIndex.interval(0,0,true), NDArrayIndex.interval(0, nWeightParams));

    params.put(WEIGHT_KEY, createWeightMatrix(conf, weightView, initializeParams));
    conf.addVariable(WEIGHT_KEY);

    long offset = nWeightParams;
    if(hasBias(layerConf)){
        INDArray biasView = paramsView.get(NDArrayIndex.interval(0,0,true),
                NDArrayIndex.interval(offset, offset + nOut));
        params.put(BIAS_KEY, createBias(conf, biasView, initializeParams));
        conf.addVariable(BIAS_KEY);
        offset += nOut;
    }

    if(hasLayerNorm(layerConf)){
        INDArray gainView = paramsView.get(NDArrayIndex.interval(0,0,true),
                NDArrayIndex.interval(offset, offset + nOut));
        params.put(GAIN_KEY, createGain(conf, gainView, initializeParams));
        conf.addVariable(GAIN_KEY);
    }

    return params;
}
 
Example 17
Source File: BatchNormalizationParamInitializer.java    From deeplearning4j with Apache License 2.0 4 votes vote down vote up
@Override
public Map<String, INDArray> init(NeuralNetConfiguration conf, INDArray paramView, boolean initializeParams) {
    Map<String, INDArray> params = Collections.synchronizedMap(new LinkedHashMap<String, INDArray>());
    // TODO setup for RNN
    BatchNormalization layer = (BatchNormalization) conf.getLayer();
    val nOut = layer.getNOut();

    long meanOffset = 0;
    if (!layer.isLockGammaBeta()) { //No gamma/beta parameters when gamma/beta are locked
        INDArray gammaView = paramView.get(NDArrayIndex.interval(0,0,true), NDArrayIndex.interval(0, nOut));
        INDArray betaView = paramView.get(NDArrayIndex.interval(0,0,true), NDArrayIndex.interval(nOut, 2 * nOut));

        params.put(GAMMA, createGamma(conf, gammaView, initializeParams));
        conf.addVariable(GAMMA);
        params.put(BETA, createBeta(conf, betaView, initializeParams));
        conf.addVariable(BETA);

        meanOffset = 2 * nOut;
    }

    INDArray globalMeanView =
                    paramView.get(NDArrayIndex.interval(0,0,true), NDArrayIndex.interval(meanOffset, meanOffset + nOut));
    INDArray globalVarView = paramView.get(NDArrayIndex.interval(0,0,true),
                    NDArrayIndex.interval(meanOffset + nOut, meanOffset + 2 * nOut));

    if (initializeParams) {
        globalMeanView.assign(0);
        if(layer.isUseLogStd()){
            //Global log stdev: assign 0.0 as initial value (s=sqrt(v), and log10(s) = log10(sqrt(v)) -> log10(1) = 0
            globalVarView.assign(0);
        } else {
            //Global variance view: assign 1.0 as initial value
            globalVarView.assign(1);
        }
    }

    params.put(GLOBAL_MEAN, globalMeanView);
    conf.addVariable(GLOBAL_MEAN);
    if(layer.isUseLogStd()){
        params.put(GLOBAL_LOG_STD, globalVarView);
        conf.addVariable(GLOBAL_LOG_STD);
    } else {
        params.put(GLOBAL_VAR, globalVarView);
        conf.addVariable(GLOBAL_VAR);
    }

    return params;
}
 
Example 18
Source File: GravesBidirectionalLSTMParamInitializer.java    From deeplearning4j with Apache License 2.0 4 votes vote down vote up
@Override
public Map<String, INDArray> init(NeuralNetConfiguration conf, INDArray paramsView, boolean initializeParams) {
    Map<String, INDArray> params = Collections.synchronizedMap(new LinkedHashMap<String, INDArray>());

    org.deeplearning4j.nn.conf.layers.GravesBidirectionalLSTM layerConf =
            (org.deeplearning4j.nn.conf.layers.GravesBidirectionalLSTM) conf.getLayer();
    double forgetGateInit = layerConf.getForgetGateBiasInit();

    val nL = layerConf.getNOut(); //i.e., n neurons in this layer
    val nLast = layerConf.getNIn(); //i.e., n neurons in previous layer

    conf.addVariable(INPUT_WEIGHT_KEY_FORWARDS);
    conf.addVariable(RECURRENT_WEIGHT_KEY_FORWARDS);
    conf.addVariable(BIAS_KEY_FORWARDS);
    conf.addVariable(INPUT_WEIGHT_KEY_BACKWARDS);
    conf.addVariable(RECURRENT_WEIGHT_KEY_BACKWARDS);
    conf.addVariable(BIAS_KEY_BACKWARDS);

    val nParamsInput = nLast * (4 * nL);
    val nParamsRecurrent = nL * (4 * nL + 3);
    val nBias = 4 * nL;

    val rwFOffset = nParamsInput;
    val bFOffset = rwFOffset + nParamsRecurrent;
    val iwROffset = bFOffset + nBias;
    val rwROffset = iwROffset + nParamsInput;
    val bROffset = rwROffset + nParamsRecurrent;

    INDArray iwF = paramsView.get(NDArrayIndex.interval(0,0,true), NDArrayIndex.interval(0, rwFOffset));
    INDArray rwF = paramsView.get(NDArrayIndex.interval(0,0,true), NDArrayIndex.interval(rwFOffset, bFOffset));
    INDArray bF = paramsView.get(NDArrayIndex.interval(0,0,true), NDArrayIndex.interval(bFOffset, iwROffset));
    INDArray iwR = paramsView.get(NDArrayIndex.interval(0,0,true), NDArrayIndex.interval(iwROffset, rwROffset));
    INDArray rwR = paramsView.get(NDArrayIndex.interval(0,0,true), NDArrayIndex.interval(rwROffset, bROffset));
    INDArray bR = paramsView.get(NDArrayIndex.interval(0,0,true), NDArrayIndex.interval(bROffset, bROffset + nBias));

    if (initializeParams) {
        bF.put(new INDArrayIndex[]{NDArrayIndex.interval(0,0,true), NDArrayIndex.interval(nL, 2 * nL)},
                Nd4j.ones(1, nL).muli(forgetGateInit)); //Order: input, forget, output, input modulation, i.e., IFOG
        bR.put(new INDArrayIndex[]{NDArrayIndex.interval(0,0,true), NDArrayIndex.interval(nL, 2 * nL)},
                Nd4j.ones(1, nL).muli(forgetGateInit));
    }
    /*The above line initializes the forget gate biases to specified value.
     * See Sutskever PhD thesis, pg19:
     * "it is important for [the forget gate activations] to be approximately 1 at the early stages of learning,
     *  which is accomplished by initializing [the forget gate biases] to a large value (such as 5). If it is
     *  not done, it will be harder to learn long range dependencies because the smaller values of the forget
     *  gates will create a vanishing gradients problem."
     *  http://www.cs.utoronto.ca/~ilya/pubs/ilya_sutskever_phd_thesis.pdf
     */

    if (initializeParams) {
        //As per standard LSTM
        val fanIn = nL;
        val fanOut = nLast + nL;
        val inputWShape = new long[]{nLast, 4 * nL};
        val recurrentWShape = new long[]{nL, 4 * nL + 3};

        params.put(INPUT_WEIGHT_KEY_FORWARDS, layerConf.getWeightInitFn().init(fanIn, fanOut, inputWShape,
                IWeightInit.DEFAULT_WEIGHT_INIT_ORDER, iwF));
        params.put(RECURRENT_WEIGHT_KEY_FORWARDS, layerConf.getWeightInitFn().init(fanIn, fanOut, recurrentWShape,
                IWeightInit.DEFAULT_WEIGHT_INIT_ORDER, rwF));
        params.put(BIAS_KEY_FORWARDS, bF);
        params.put(INPUT_WEIGHT_KEY_BACKWARDS, layerConf.getWeightInitFn().init(fanIn, fanOut, inputWShape,
                IWeightInit.DEFAULT_WEIGHT_INIT_ORDER, iwR));
        params.put(RECURRENT_WEIGHT_KEY_BACKWARDS, layerConf.getWeightInitFn().init(fanIn, fanOut, recurrentWShape,
                IWeightInit.DEFAULT_WEIGHT_INIT_ORDER, rwR));
        params.put(BIAS_KEY_BACKWARDS, bR);
    } else {
        params.put(INPUT_WEIGHT_KEY_FORWARDS, WeightInitUtil.reshapeWeights(new long[]{nLast, 4 * nL}, iwF));
        params.put(RECURRENT_WEIGHT_KEY_FORWARDS, WeightInitUtil.reshapeWeights(new long[]{nL, 4 * nL + 3}, rwF));
        params.put(BIAS_KEY_FORWARDS, bF);
        params.put(INPUT_WEIGHT_KEY_BACKWARDS, WeightInitUtil.reshapeWeights(new long[]{nLast, 4 * nL}, iwR));
        params.put(RECURRENT_WEIGHT_KEY_BACKWARDS, WeightInitUtil.reshapeWeights(new long[]{nL, 4 * nL + 3}, rwR));
        params.put(BIAS_KEY_BACKWARDS, bR);
    }

    return params;
}
 
Example 19
Source File: TestOptimizers.java    From deeplearning4j with Apache License 2.0 4 votes vote down vote up
public void testSphereFnOptHelper(OptimizationAlgorithm oa, int numLineSearchIter, int nDimensions) {

        if (PRINT_OPT_RESULTS)
            System.out.println("---------\n Alg= " + oa + ", nIter= " + numLineSearchIter + ", nDimensions= "
                            + nDimensions);

        NeuralNetConfiguration conf = new NeuralNetConfiguration.Builder().maxNumLineSearchIterations(numLineSearchIter)
                        .updater(new Sgd(1e-2))
                        .layer(new DenseLayer.Builder().nIn(1).nOut(1).build()).build();
        conf.addVariable("W"); //Normally done by ParamInitializers, but obviously that isn't done here

        Random rng = new DefaultRandom(12345L);
        org.nd4j.linalg.api.rng.distribution.Distribution dist =
                        new org.nd4j.linalg.api.rng.distribution.impl.UniformDistribution(rng, -10, 10);
        Model m = new SphereFunctionModel(nDimensions, dist, conf);
        m.computeGradientAndScore(LayerWorkspaceMgr.noWorkspaces());
        double scoreBefore = m.score();
        assertTrue(!Double.isNaN(scoreBefore) && !Double.isInfinite(scoreBefore));
        if (PRINT_OPT_RESULTS) {
            System.out.println("Before:");
            System.out.println(scoreBefore);
            System.out.println(m.params());
        }

        ConvexOptimizer opt = getOptimizer(oa, conf, m);

        opt.setupSearchState(m.gradientAndScore());
        for( int i=0; i<100; i++ ) {
            opt.optimize(LayerWorkspaceMgr.noWorkspaces());
        }
        m.computeGradientAndScore(LayerWorkspaceMgr.noWorkspaces());
        double scoreAfter = m.score();

        assertTrue(!Double.isNaN(scoreAfter) && !Double.isInfinite(scoreAfter));
        if (PRINT_OPT_RESULTS) {
            System.out.println("After:");
            System.out.println(scoreAfter);
            System.out.println(m.params());
        }

        //Expected behaviour after optimization:
        //(a) score is better (lower) after optimization.
        //(b) Parameters are closer to minimum after optimization (TODO)
        assertTrue("Score did not improve after optimization (b= " + scoreBefore + " ,a= " + scoreAfter + ")",
                        scoreAfter < scoreBefore);
    }
 
Example 20
Source File: TestOptimizers.java    From deeplearning4j with Apache License 2.0 4 votes vote down vote up
private static void testRosenbrockFnMultipleStepsHelper(OptimizationAlgorithm oa, int nOptIter,
                int maxNumLineSearchIter) {
    double[] scores = new double[nOptIter + 1];

    for (int i = 0; i <= nOptIter; i++) {
        NeuralNetConfiguration conf = new NeuralNetConfiguration.Builder()
                        .maxNumLineSearchIterations(maxNumLineSearchIter)
                        .updater(new Sgd(1e-1))
                        .stepFunction(new org.deeplearning4j.nn.conf.stepfunctions.NegativeDefaultStepFunction())
                        .layer(new DenseLayer.Builder().nIn(1).nOut(1).build())
                        .build();
        conf.addVariable("W"); //Normally done by ParamInitializers, but obviously that isn't done here

        Model m = new RosenbrockFunctionModel(100, conf);
        if (i == 0) {
            m.computeGradientAndScore(LayerWorkspaceMgr.noWorkspaces());
            scores[0] = m.score(); //Before optimization
        } else {
            ConvexOptimizer opt = getOptimizer(oa, conf, m);
            opt.optimize(LayerWorkspaceMgr.noWorkspaces());
            m.computeGradientAndScore(LayerWorkspaceMgr.noWorkspaces());
            scores[i] = m.score();
            assertTrue("NaN or infinite score: " + scores[i],
                            !Double.isNaN(scores[i]) && !Double.isInfinite(scores[i]));
        }
    }

    if (PRINT_OPT_RESULTS) {
        System.out.println("Rosenbrock: Multiple optimization iterations ( " + nOptIter
                        + " opt. iter.) score vs iteration, maxNumLineSearchIter= " + maxNumLineSearchIter + ": "
                        + oa);
        System.out.println(Arrays.toString(scores));
    }
    for (int i = 1; i < scores.length; i++) {
        if (i == 1) {
            assertTrue(scores[i] < scores[i - 1]); //Require at least one step of improvement
        } else {
            assertTrue(scores[i] <= scores[i - 1]);
        }
    }
}