org.deeplearning4j.nn.gradient.Gradient Java Examples

The following examples show how to use org.deeplearning4j.nn.gradient.Gradient. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: SpaceToBatch.java    From deeplearning4j with Apache License 2.0 6 votes vote down vote up
@Override
public Pair<Gradient, INDArray> backpropGradient(INDArray epsilon, LayerWorkspaceMgr workspaceMgr) {
    assertInputSet(true);

    INDArray input = this.input.castTo(dataType);   //Cast to network dtype if required (no-op if already correct type)

    boolean nchw = layerConf().getFormat() == CNN2DFormat.NCHW;

    INDArray outEpsilon = workspaceMgr.createUninitialized(ArrayType.ACTIVATION_GRAD, input.dataType(), input.shape(), 'c');

    Gradient gradient = new DefaultGradient();

    INDArray epsilonNHWC = nchw ? epsilon.permute(0, 2, 3, 1) : epsilon;
    INDArray outEpsilonNHWC = nchw ? outEpsilon.permute(0, 2, 3, 1) : outEpsilon;

    CustomOp op = DynamicCustomOp.builder("batch_to_space_nd")
            .addInputs(epsilonNHWC, getBlocksArray(), getPaddingArray())
            .addOutputs(outEpsilonNHWC)
            .callInplace(false)
            .build();
    Nd4j.exec(op);

    outEpsilon = backpropDropOutIfPresent(outEpsilon);
    return new Pair<>(gradient, outEpsilon);
}
 
Example #2
Source File: DeepFMOutputLayer.java    From jstarcraft-rns with Apache License 2.0 6 votes vote down vote up
private Pair<Gradient, INDArray> getGradientsAndDelta(INDArray preOut, LayerWorkspaceMgr workspaceMgr) {
    ILossFunction lossFunction = layerConf().getLossFn();
    INDArray labels2d = getLabels2d(workspaceMgr, ArrayType.BP_WORKING_MEM);
    // INDArray delta = lossFunction.computeGradient(labels2d, preOut,
    // layerConf().getActivationFunction(), maskArray);
    INDArray delta = lossFunction.computeGradient(labels2d, preOut, layerConf().getActivationFn(), maskArray);

    Gradient gradient = new DefaultGradient();

    INDArray weightGradView = gradientViews.get(DefaultParamInitializer.WEIGHT_KEY);
    Nd4j.gemm(input, delta, weightGradView, true, false, 1.0, 0.0); // Equivalent to: weightGradView.assign(input.transpose().mmul(delta));
    gradient.gradientForVariable().put(DefaultParamInitializer.WEIGHT_KEY, weightGradView);

    if (hasBias()) {
        INDArray biasGradView = gradientViews.get(DefaultParamInitializer.BIAS_KEY);
        delta.sum(biasGradView, 0); // biasGradView is initialized/zeroed first in sum op
        gradient.gradientForVariable().put(DefaultParamInitializer.BIAS_KEY, biasGradView);
    }

    delta = workspaceMgr.leverageTo(ArrayType.ACTIVATION_GRAD, delta);
    return new Pair<>(gradient, delta);
}
 
Example #3
Source File: ZeroPaddingLayer.java    From deeplearning4j with Apache License 2.0 6 votes vote down vote up
@Override
public Pair<Gradient, INDArray> backpropGradient(INDArray epsilon, LayerWorkspaceMgr workspaceMgr) {
    assertInputSet(true);
    val inShape = input.shape();

    boolean nchw = layerConf().getDataFormat() == CNN2DFormat.NCHW;
    int hIdx = nchw ? 2 : 1;
    int wIdx = nchw ? 3 : 2;

    INDArray epsNext;
    int[] padding = layerConf().getPadding();
    if(layerConf().getDataFormat() == CNN2DFormat.NCHW){
        epsNext = epsilon.get(NDArrayIndex.all(), NDArrayIndex.all(),
                NDArrayIndex.interval(padding[0], padding[0] + inShape[hIdx]),
                NDArrayIndex.interval(padding[2], padding[2] + inShape[wIdx]));
    } else {
        //NHWC
        epsNext = epsilon.get(NDArrayIndex.all(),
                NDArrayIndex.interval(padding[0], padding[0] + inShape[hIdx]),
                NDArrayIndex.interval(padding[2], padding[2] + inShape[wIdx]),
                NDArrayIndex.all());
    }

    epsNext = workspaceMgr.leverageTo(ArrayType.ACTIVATION_GRAD, epsNext);
    return new Pair<>((Gradient) new DefaultGradient(), epsNext);
}
 
Example #4
Source File: BaseStatsListener.java    From deeplearning4j with Apache License 2.0 6 votes vote down vote up
@Override
public void onGradientCalculation(Model model) {
    int iterCount = getModelInfo(model).iterCount;
    if (calcFromGradients() && updateConfig.reportingFrequency() > 0
            && (iterCount == 0 || iterCount % updateConfig.reportingFrequency() == 0)) {
        Gradient g = model.gradient();
        if (updateConfig.collectHistograms(StatsType.Gradients)) {
            gradientHistograms = getHistograms(g.gradientForVariable(), updateConfig.numHistogramBins(StatsType.Gradients));
        }

        if (updateConfig.collectMean(StatsType.Gradients)) {
            meanGradients = calculateSummaryStats(g.gradientForVariable(), StatType.Mean);
        }
        if (updateConfig.collectStdev(StatsType.Gradients)) {
            stdevGradient = calculateSummaryStats(g.gradientForVariable(), StatType.Stdev);
        }
        if (updateConfig.collectMeanMagnitudes(StatsType.Gradients)) {
            meanMagGradients = calculateSummaryStats(g.gradientForVariable(), StatType.MeanMagnitude);
        }
    }
}
 
Example #5
Source File: BasePretrainNetwork.java    From deeplearning4j with Apache License 2.0 6 votes vote down vote up
protected Gradient createGradient(INDArray wGradient, INDArray vBiasGradient, INDArray hBiasGradient) {
    Gradient ret = new DefaultGradient(gradientsFlattened);
    // The order of the following statements matter! The gradient is being flattened and applied to
    // flattened params in this order.
    // The arrays neeed to be views, with the current Updater implementation

    //TODO: optimize this, to do it would the assigns
    INDArray wg = gradientViews.get(PretrainParamInitializer.WEIGHT_KEY);
    wg.assign(wGradient);

    INDArray hbg = gradientViews.get(PretrainParamInitializer.BIAS_KEY);
    hbg.assign(hBiasGradient);

    INDArray vbg = gradientViews.get(PretrainParamInitializer.VISIBLE_BIAS_KEY);
    vbg.assign(vBiasGradient);

    ret.gradientForVariable().put(PretrainParamInitializer.WEIGHT_KEY, wg);
    ret.gradientForVariable().put(PretrainParamInitializer.BIAS_KEY, hbg);
    ret.gradientForVariable().put(PretrainParamInitializer.VISIBLE_BIAS_KEY, vbg);

    return ret;
}
 
Example #6
Source File: CenterLossOutputLayer.java    From deeplearning4j with Apache License 2.0 6 votes vote down vote up
@Override
public Pair<Gradient, INDArray> backpropGradient(INDArray epsilon, LayerWorkspaceMgr workspaceMgr) {
    assertInputSet(true);
    Pair<Gradient, INDArray> pair = getGradientsAndDelta(preOutput2d(true, workspaceMgr), workspaceMgr); //Returns Gradient and delta^(this), not Gradient and epsilon^(this-1)
    INDArray delta = pair.getSecond();

    // centers
    INDArray centers = params.get(CenterLossParamInitializer.CENTER_KEY);
    INDArray l = labels.castTo(centers.dataType());     //Ensure correct dtype (same as params); no-op if already correct dtype
    INDArray centersForExamples = l.mmul(centers);
    INDArray dLcdai = input.sub(centersForExamples);

    INDArray w = getParamWithNoise(CenterLossParamInitializer.WEIGHT_KEY, true, workspaceMgr);

    INDArray epsilonNext = workspaceMgr.createUninitialized(ArrayType.ACTIVATION_GRAD, w.dataType(), new long[]{w.size(0), delta.size(0)}, 'f');
    epsilonNext = w.mmuli(delta.transpose(), epsilonNext).transpose();
    double lambda = layerConf().getLambda();
    epsilonNext.addi(dLcdai.muli(lambda)); // add center loss here

    weightNoiseParams.clear();

    return new Pair<>(pair.getFirst(), epsilonNext);
}
 
Example #7
Source File: TestGraphNodes.java    From deeplearning4j with Apache License 2.0 6 votes vote down vote up
@Test
public void testStackNode() {
    Nd4j.getRandom().setSeed(12345);
    GraphVertex unstack = new StackVertex(null, "", -1, Nd4j.dataType());

    INDArray in1 = Nd4j.rand(5, 2);
    INDArray in2 = Nd4j.rand(5, 2);
    INDArray in3 = Nd4j.rand(5, 2);
    unstack.setInputs(in1, in2, in3);
    INDArray out = unstack.doForward(false, LayerWorkspaceMgr.noWorkspaces());
    assertEquals(in1, out.get(NDArrayIndex.interval(0, 5), NDArrayIndex.all()));
    assertEquals(in2, out.get(NDArrayIndex.interval(5, 10), NDArrayIndex.all()));
    assertEquals(in3, out.get(NDArrayIndex.interval(10, 15), NDArrayIndex.all()));

    unstack.setEpsilon(out);
    Pair<Gradient, INDArray[]> b = unstack.doBackward(false, LayerWorkspaceMgr.noWorkspaces());

    assertEquals(in1, b.getSecond()[0]);
    assertEquals(in2, b.getSecond()[1]);
    assertEquals(in3, b.getSecond()[2]);
}
 
Example #8
Source File: SubsetVertex.java    From deeplearning4j with Apache License 2.0 6 votes vote down vote up
@Override
public Pair<Gradient, INDArray[]> doBackward(boolean tbptt, LayerWorkspaceMgr workspaceMgr) {
    if (!canDoBackward())
        throw new IllegalStateException("Cannot do backward pass: error not set");

    INDArray out = workspaceMgr.create(ArrayType.ACTIVATION_GRAD, epsilon.dataType(), forwardShape);
    switch (forwardShape.length) {
        case 2:
            out.put(new INDArrayIndex[] {NDArrayIndex.all(), NDArrayIndex.interval(from, to, true)}, epsilon);
            break;
        case 3:
            out.put(new INDArrayIndex[] {NDArrayIndex.all(), NDArrayIndex.interval(from, to, true),
                            NDArrayIndex.all()}, epsilon);
            break;
        case 4:
            out.put(new INDArrayIndex[] {NDArrayIndex.all(), NDArrayIndex.interval(from, to, true),
                            NDArrayIndex.all(), NDArrayIndex.all()}, epsilon);
            break;
        default:
            throw new RuntimeException("Invalid activation rank"); //Should never happen
    }
    return new Pair<>(null, new INDArray[] {out});
}
 
Example #9
Source File: FrozenLayerWithBackprop.java    From deeplearning4j with Apache License 2.0 5 votes vote down vote up
@Override
public Pair<Gradient, INDArray> backpropGradient(INDArray epsilon, LayerWorkspaceMgr workspaceMgr) {
    INDArray backpropEpsilon = underlying.backpropGradient(epsilon, workspaceMgr).getSecond();
    //backprop might have already changed the gradient view (like BaseLayer and BaseOutputLayer do)
    //so we want to put it back to zeroes
    INDArray gradientView = underlying.getGradientsViewArray();
    if(gradientView != null){
        gradientView.assign(0);
    }
    return new Pair<>(zeroGradient, backpropEpsilon);
}
 
Example #10
Source File: SubsamplingLayerTest.java    From deeplearning4j with Apache License 2.0 5 votes vote down vote up
@Test
public void testSubSampleLayerMaxBackprop() throws Exception {
    INDArray expectedContainedEpsilonInput =
                    Nd4j.create(new double[] {1., 1., 1., 1., 1., 1., 1., 1.}, new int[] {1, 2, 2, 2}).castTo(Nd4j.defaultFloatingPointType());

    INDArray expectedContainedEpsilonResult = Nd4j.create(new double[] {0., 0., 0., 1., 1., 0., 0., 0., 0., 0., 1.,
                    0., 0., 1., 0., 0., 0., 0., 0., 1., 1., 0., 0., 0., 1., 0., 1., 0., 0., 0., 0., 0.},
                    new int[] {1, 2, 4, 4}).castTo(Nd4j.defaultFloatingPointType());

    INDArray input = getContainedData();

    Layer layer = getSubsamplingLayer(SubsamplingLayer.PoolingType.MAX);
    layer.activate(input, false, LayerWorkspaceMgr.noWorkspaces());

    Pair<Gradient, INDArray> containedOutput = layer.backpropGradient(expectedContainedEpsilonInput, LayerWorkspaceMgr.noWorkspaces());
    assertEquals(expectedContainedEpsilonResult, containedOutput.getSecond());
    assertEquals(null, containedOutput.getFirst().getGradientFor("W"));
    assertEquals(expectedContainedEpsilonResult.shape().length, containedOutput.getSecond().shape().length);

    INDArray input2 = getData();
    layer.activate(input2, false, LayerWorkspaceMgr.noWorkspaces());
    long depth = input2.size(1);

    epsilon = Nd4j.ones(5, depth, featureMapHeight, featureMapWidth);

    Pair<Gradient, INDArray> out = layer.backpropGradient(epsilon, LayerWorkspaceMgr.noWorkspaces());
    assertEquals(input.shape().length, out.getSecond().shape().length);
    assertEquals(depth, out.getSecond().size(1)); // channels retained
}
 
Example #11
Source File: Upsampling1DTest.java    From deeplearning4j with Apache License 2.0 5 votes vote down vote up
@Test
public void testUpsampling1DBackprop() throws Exception {
    INDArray expectedContainedEpsilonInput =
                    Nd4j.create(new double[] {1., 3., 2., 6., 7., 2., 5., 5.},
                            new int[] {1, 1, 8});

    INDArray expectedContainedEpsilonResult = Nd4j.create(new double[] {4., 8., 9., 10.},
                    new int[] {1, 1, 4});

    INDArray input = getContainedData();

    Layer layer = getUpsampling1DLayer();
    layer.activate(input, false, LayerWorkspaceMgr.noWorkspaces());

    Pair<Gradient, INDArray> containedOutput = layer.backpropGradient(expectedContainedEpsilonInput, LayerWorkspaceMgr.noWorkspaces());

    assertEquals(expectedContainedEpsilonResult, containedOutput.getSecond());
    assertEquals(null, containedOutput.getFirst().getGradientFor("W"));
    assertEquals(expectedContainedEpsilonResult.shape().length, containedOutput.getSecond().shape().length);

    INDArray input2 = getData();
    layer.activate(input2, false, LayerWorkspaceMgr.noWorkspaces());
    val depth = input2.size(1);

    epsilon = Nd4j.ones(5, depth, outputLength);

    Pair<Gradient, INDArray> out = layer.backpropGradient(epsilon, LayerWorkspaceMgr.noWorkspaces());
    assertEquals(input.shape().length, out.getSecond().shape().length);
    assertEquals(depth, out.getSecond().size(1));
}
 
Example #12
Source File: DropoutLayer.java    From deeplearning4j with Apache License 2.0 5 votes vote down vote up
@Override
public Pair<Gradient, INDArray> backpropGradient(INDArray epsilon, LayerWorkspaceMgr workspaceMgr) {
    INDArray delta = workspaceMgr.dup(ArrayType.ACTIVATION_GRAD, epsilon);

    if (maskArray != null) {
        delta.muliColumnVector(maskArray);
    }

    Gradient ret = new DefaultGradient();
    delta = backpropDropOutIfPresent(delta);
    return new Pair<>(ret, delta);
}
 
Example #13
Source File: LayerVertex.java    From deeplearning4j with Apache License 2.0 5 votes vote down vote up
@Override
public Pair<Gradient, INDArray[]> doBackward(boolean tbptt, LayerWorkspaceMgr workspaceMgr) {
    if (!canDoBackward()) {
        if(inputs == null || inputs[0] == null){
            throw new IllegalStateException("Cannot do backward pass: inputs not set. Layer: \"" + vertexName
                    + "\" (idx " + vertexIndex + "), numInputs: " + getNumInputArrays());
        } else {
            throw new IllegalStateException("Cannot do backward pass: all epsilons not set. Layer \"" + vertexName
                    + "\" (idx " + vertexIndex + "), numInputs :" + getNumInputArrays() + "; numOutputs: "
                    + getNumOutputConnections());
        }
    }

    //Edge case: output layer - never did forward pass hence layer.setInput was never called...
    if(!setLayerInput){
        applyPreprocessorAndSetInput(workspaceMgr);
    }

    Pair<Gradient, INDArray> pair;
    if (tbptt && layer instanceof RecurrentLayer) {
        //Truncated BPTT for recurrent layers
        pair = ((RecurrentLayer) layer).tbpttBackpropGradient(epsilon,
                        graph.getConfiguration().getTbpttBackLength(), workspaceMgr);
    } else {
        //Normal backprop
        pair = layer.backpropGradient(epsilon, workspaceMgr); //epsTotal may be null for OutputLayers
    }

    if (layerPreProcessor != null) {
        INDArray eps = pair.getSecond();
        eps = layerPreProcessor.backprop(eps, graph.batchSize(), workspaceMgr);
        pair.setSecond(eps);
    }

    //Layers always have single activations input -> always have single epsilon output during backprop
    return new Pair<>(pair.getFirst(), new INDArray[] {pair.getSecond()});
}
 
Example #14
Source File: ZeroPadding1DLayer.java    From deeplearning4j with Apache License 2.0 5 votes vote down vote up
@Override
public Pair<Gradient, INDArray> backpropGradient(INDArray epsilon, LayerWorkspaceMgr workspaceMgr) {
    assertInputSet(true);
    val inShape = input.shape();

    INDArray epsNext = epsilon.get(NDArrayIndex.all(), NDArrayIndex.all(),
            NDArrayIndex.interval(padding[0], padding[0] + inShape[2]));

    return new Pair<>((Gradient) new DefaultGradient(), workspaceMgr.leverageTo(ArrayType.ACTIVATION_GRAD, epsNext));
}
 
Example #15
Source File: RepeatVectorTest.java    From deeplearning4j with Apache License 2.0 5 votes vote down vote up
@Test
public void testRepeatVector() {

    double[] arr = new double[] {1., 2., 3., 1., 2., 3., 1., 2., 3., 1., 2., 3.};
    INDArray expectedOut = Nd4j.create(arr, new long[] {1, 3, REPEAT}, 'f');
    INDArray input = Nd4j.create(new double[] {1., 2., 3.}, new long[] {1, 3});
    Layer layer = getRepeatVectorLayer();

    INDArray output = layer.activate(input, false, LayerWorkspaceMgr.noWorkspaces());
    assertTrue(Arrays.equals(expectedOut.shape(), output.shape()));
    assertEquals(expectedOut, output);

    INDArray epsilon = Nd4j.ones(1,3,4);

    Pair<Gradient, INDArray> out = layer.backpropGradient(epsilon, LayerWorkspaceMgr.noWorkspaces());
    INDArray outEpsilon = out.getSecond();
    INDArray expectedEpsilon = Nd4j.create(new double[] {4., 4., 4.}, new long[] {1, 3});
    assertEquals(expectedEpsilon, outEpsilon);
}
 
Example #16
Source File: LBFGS.java    From deeplearning4j with Apache License 2.0 5 votes vote down vote up
@Override
public void setupSearchState(Pair<Gradient, Double> pair) {
    super.setupSearchState(pair);
    INDArray params = (INDArray) searchState.get(PARAMS_KEY);
    searchState.put("s", new LinkedList<INDArray>()); // holds parameters differences
    searchState.put("y", new LinkedList<INDArray>()); // holds gradients differences
    searchState.put("rho", new LinkedList<Double>());
    searchState.put("oldparams", params.dup());

}
 
Example #17
Source File: EmbeddingLayer.java    From deeplearning4j with Apache License 2.0 5 votes vote down vote up
@Override
public Pair<Gradient, INDArray> backpropGradient(INDArray epsilon, LayerWorkspaceMgr workspaceMgr) {
    assertInputSet(true);
    //If this layer is layer L, then epsilon is (w^(L+1)*(d^(L+1))^T) (or equivalent)
    INDArray z = preOutput(true, workspaceMgr);
    INDArray delta = layerConf().getActivationFn().backprop(z, epsilon).getFirst(); //TODO handle activation function params

    if (maskArray != null) {
        delta.muliColumnVector(maskArray.castTo(dataType));
    }

    INDArray weightGradients = gradientViews.get(DefaultParamInitializer.WEIGHT_KEY);
    weightGradients.assign(0);

    long[] indexes = new long[(int) input.length()];
    for (int i = 0; i < indexes.length; i++) {
        indexes[i] = input.getInt(i, 0);
    }

    INDArray indices = Nd4j.createFromArray(indexes);
    Nd4j.scatterUpdate(org.nd4j.linalg.api.ops.impl.scatter.ScatterUpdate.UpdateOp.ADD, weightGradients, indices, delta, DIM_1);


    Gradient ret = new DefaultGradient();
    ret.gradientForVariable().put(DefaultParamInitializer.WEIGHT_KEY, weightGradients);

    if(hasBias()) {
        INDArray biasGradientsView = gradientViews.get(DefaultParamInitializer.BIAS_KEY);
        delta.sum(biasGradientsView, 0); //biasGradientView is initialized/zeroed first in sum op
        ret.gradientForVariable().put(DefaultParamInitializer.BIAS_KEY, biasGradientsView);
    }

    return new Pair<>(ret, null); //Don't bother returning epsilons: no layer below this one...
}
 
Example #18
Source File: DQN.java    From deeplearning4j with Apache License 2.0 5 votes vote down vote up
public Gradient[] gradient(INDArray input, INDArray labels) {
    mln.setInput(input);
    mln.setLabels(labels);
    mln.computeGradientAndScore();
    Collection<TrainingListener> iterationListeners = mln.getListeners();
    if (iterationListeners != null && iterationListeners.size() > 0) {
        for (TrainingListener l : iterationListeners) {
            l.onGradientCalculation(mln);
        }
    }
    //System.out.println("SCORE: " + mln.score());
    return new Gradient[] {mln.gradient()};
}
 
Example #19
Source File: FrozenLayerWithBackprop.java    From deeplearning4j with Apache License 2.0 5 votes vote down vote up
@Override
public void update(Gradient gradient) {
    if (!logUpdate) {
        OneTimeLogger.info(log, "Frozen layers will not be updated. Warning will be issued only once per instance");
        logUpdate = true;
    }
    //no op
}
 
Example #20
Source File: Cropping2DLayer.java    From deeplearning4j with Apache License 2.0 5 votes vote down vote up
@Override
public Pair<Gradient, INDArray> backpropGradient(INDArray epsilon, LayerWorkspaceMgr workspaceMgr) {
    val inShape = input.shape();
    INDArray epsNext = workspaceMgr.create(ArrayType.ACTIVATION_GRAD, input.dataType(), inShape, 'c');
    INDArray epsNextSubset = inputSubset(epsNext);
    epsNextSubset.assign(epsilon);
    return new Pair<>((Gradient) new DefaultGradient(), epsNext);
}
 
Example #21
Source File: Cnn3DLossLayer.java    From deeplearning4j with Apache License 2.0 5 votes vote down vote up
@Override
public Pair<Gradient, INDArray> backpropGradient(INDArray epsilon, LayerWorkspaceMgr workspaceMgr) {
    assertInputSet(true);
    if (input.rank() != 5)
        throw new UnsupportedOperationException(
                "Input is not rank 5. Got input with rank " + input.rank() + " " + layerId() + " with shape "
                        + Arrays.toString(input.shape()) + " - expected shape [minibatch,channels,depth,height,width]");
    if (labels == null)
        throw new IllegalStateException("Labels are not set (null)");

    INDArray input2d = ConvolutionUtils.reshape5dTo2d(layerConf().getDataFormat(), input, workspaceMgr, ArrayType.FF_WORKING_MEM);
    INDArray labels2d = ConvolutionUtils.reshape5dTo2d(layerConf().getDataFormat(), labels, workspaceMgr, ArrayType.FF_WORKING_MEM);
    INDArray maskReshaped = ConvolutionUtils.reshapeCnn3dMask(layerConf().getDataFormat(), maskArray, labels, workspaceMgr, ArrayType.FF_WORKING_MEM);

    // delta calculation
    ILossFunction lossFunction = layerConf().getLossFn();
    INDArray delta2d = lossFunction.computeGradient(labels2d, input2d.dup(input2d.ordering()), layerConf().getActivationFn(), maskReshaped);
    delta2d = workspaceMgr.leverageTo(ArrayType.ACTIVATION_GRAD, delta2d);

    long n = input.size(0);
    long d, h, w, c;
    if(layerConf().getDataFormat() == Convolution3D.DataFormat.NDHWC){
        d = input.size(1);
        h = input.size(2);
        w = input.size(3);
        c = input.size(4);
    } else {
        d = input.size(2);
        h = input.size(3);
        w = input.size(4);
        c = input.size(1);
    }
    INDArray delta5d = ConvolutionUtils.reshape2dTo5d(layerConf().getDataFormat(), delta2d, n, d, h, w, c, workspaceMgr, ArrayType.ACTIVATION_GRAD);

    // grab the empty gradient
    Gradient gradient = new DefaultGradient();
    return new Pair<>(gradient, delta5d);
}
 
Example #22
Source File: L2Vertex.java    From deeplearning4j with Apache License 2.0 5 votes vote down vote up
@Override
public Pair<Gradient, INDArray[]> doBackward(boolean tbptt, LayerWorkspaceMgr workspaceMgr) {
    if (!canDoBackward())
        throw new IllegalStateException("Cannot do backward pass: error not set");

    INDArray a = inputs[0];
    INDArray b = inputs[1];
    INDArray out = doForward(tbptt, workspaceMgr);
    Transforms.max(out, eps, false); // in case of 0

    INDArray dLdlambda = epsilon; //dL/dlambda aka 'epsilon' - from layer above

    INDArray sNegHalf = out.rdiv(1.0); //s^(-1/2) = 1.0 / s^(1/2) = 1.0 / out

    INDArray diff;
    try(MemoryWorkspace ws = workspaceMgr.notifyScopeBorrowed(ArrayType.ACTIVATION_GRAD)){
        diff = a.sub(b);
    }

    INDArray first = dLdlambda.mul(sNegHalf); //Column vector for all cases

    INDArray dLda;
    INDArray dLdb;
    if (a.rank() == 2) {
        //2d case (MLPs etc)
        dLda = diff.muliColumnVector(first);
        try(MemoryWorkspace ws = workspaceMgr.notifyScopeBorrowed(ArrayType.ACTIVATION_GRAD)) {
            dLdb = dLda.neg();
        }
    } else {
        //RNN and CNN case - Broadcast along dimension 0
        dLda = Nd4j.getExecutioner().exec(new BroadcastMulOp(diff, first, diff, 0));
        try(MemoryWorkspace ws = workspaceMgr.notifyScopeBorrowed(ArrayType.ACTIVATION_GRAD)) {
            dLdb = dLda.neg();
        }
    }

    return new Pair<>(null, new INDArray[] {dLda, dLdb});
}
 
Example #23
Source File: ReshapeVertex.java    From deeplearning4j with Apache License 2.0 5 votes vote down vote up
@Override
public Pair<Gradient, INDArray[]> doBackward(boolean tbptt, LayerWorkspaceMgr workspaceMgr) {
    if (!canDoBackward())
        throw new IllegalStateException("Cannot do backward pass: errors not set");

    INDArray[] out = new INDArray[1];
    out[0] = workspaceMgr.dup(ArrayType.ACTIVATION_GRAD, epsilon.reshape(order, inputs[0].shape()));
    return new Pair<>(null, out);
}
 
Example #24
Source File: Upsampling2D.java    From deeplearning4j with Apache License 2.0 5 votes vote down vote up
@Override
public Pair<Gradient, INDArray> backpropGradient(INDArray epsilon, LayerWorkspaceMgr workspaceMgr) {
    assertInputSet(true);

    CNN2DFormat format = getFormat();
    boolean nchw = format == CNN2DFormat.NCHW;

    long miniBatch = (int) input.size(0);
    long inDepth = (int) input.size(nchw ? 1 : 3);
    long inH = (int) input.size(nchw ? 2 : 1);
    long inW = (int) input.size(nchw ? 3 : 2);

    long[] epsShape = nchw ? new long[]{miniBatch, inDepth, inH, inW} : new long[]{miniBatch, inH, inW, inDepth};
    INDArray epsOut =  workspaceMgr.createUninitialized(ArrayType.ACTIVATION_GRAD, epsilon.dataType(), epsShape, 'c');

    Gradient gradient = new DefaultGradient();

    CustomOp op = DynamicCustomOp.builder("upsampling_bp")
            .addIntegerArguments(nchw ? 1 : 0)      //1=NCHW, 0=NHWC
            .addInputs(input, epsilon)
            .addOutputs(epsOut)
            .callInplace(false)
            .build();
    Nd4j.getExecutioner().exec(op);

    epsOut = backpropDropOutIfPresent(epsOut);

    return new Pair<>(gradient, epsOut);
}
 
Example #25
Source File: RnnDataFormatTests.java    From deeplearning4j with Apache License 2.0 5 votes vote down vote up
private static List<String> differentGrads(Gradient g1, Gradient g2){
    List<String> differs = new ArrayList<>();
    Map<String,INDArray> m1 = g1.gradientForVariable();
    Map<String,INDArray> m2 = g2.gradientForVariable();
    for(String s : m1.keySet()){
        INDArray a1 = m1.get(s);
        INDArray a2 = m2.get(s);
        if(!a1.equals(a2)){
            differs.add(s);
        }
    }
    return differs;
}
 
Example #26
Source File: LossLayer.java    From deeplearning4j with Apache License 2.0 5 votes vote down vote up
/** Returns tuple: {Gradient,Delta,Output} given preOut */
private Pair<Gradient, INDArray> getGradientsAndDelta(INDArray preOut, LayerWorkspaceMgr workspaceMgr) {
    // delta calculation
    ILossFunction lossFunction = layerConf().getLossFn();
    INDArray delta = lossFunction.computeGradient(getLabels2d(), preOut, layerConf().getActivationFn(), maskArray);

    // grab the empty gradient
    Gradient gradient = new DefaultGradient();

    delta = workspaceMgr.leverageTo(ArrayType.ACTIVATION_GRAD, delta);
    return new Pair<>(gradient, delta);
}
 
Example #27
Source File: LSTMHelper.java    From deeplearning4j with Apache License 2.0 5 votes vote down vote up
Pair<Gradient, INDArray> backpropGradient(final NeuralNetConfiguration conf, final IActivation gateActivationFn,
final INDArray input, final INDArray recurrentWeights, //Shape: [hiddenLayerSize,4*hiddenLayerSize+3]; order: [wI,wF,wO,wG,wFF,wOO,wGG]
final INDArray inputWeights, //Shape: [n^(L-1),4*hiddenLayerSize]; order: [wi,wf,wo,wg]
final INDArray epsilon, final boolean truncatedBPTT, final int tbpttBackwardLength,
final FwdPassReturn fwdPass, final boolean forwards, final String inputWeightKey,
final String recurrentWeightKey, final String biasWeightKey,
final Map<String, INDArray> gradientViews, INDArray maskArray, //Input mask: should only be used with bidirectional RNNs + variable length
final boolean hasPeepholeConnections, //True for GravesLSTM, false for LSTM
final LayerWorkspaceMgr workspaceMgr);
 
Example #28
Source File: FrozenLayer.java    From deeplearning4j with Apache License 2.0 5 votes vote down vote up
@Override
public void update(Gradient gradient) {
    if (!logUpdate) {
        OneTimeLogger.info(log, "Frozen layers will not be updated. Warning will be issued only once per instance");
        logUpdate = true;
    }
    //no op
}
 
Example #29
Source File: Upsampling1D.java    From deeplearning4j with Apache License 2.0 5 votes vote down vote up
@Override
public Pair<Gradient, INDArray> backpropGradient(INDArray epsilon, LayerWorkspaceMgr workspaceMgr) {
    assertInputSet(true);

    int[] size = ((BaseUpsamplingLayer) layerConf()).getSize();
    epsilon = epsilon.reshape(epsilon.size(0), epsilon.size(1), epsilon.size(2), 1);
    // we replicate the error term times "size" so that backprop works properly on it
    epsilon = epsilon.repeat(3, size[0]);

    INDArray originalInput = input;
    input = input.castTo(dataType).reshape(input.size(0), input.size(1), input.size(2), 1);

    long miniBatch = input.size(0);
    long inDepth = input.size(1);
    long inH = input.size(2);
    long inW = input.size(3);


    INDArray outEpsilon = workspaceMgr.createUninitialized(ArrayType.ACTIVATION_GRAD, input.dataType(), miniBatch * inDepth * inH * inW);
    INDArray reshapedEpsilon = outEpsilon.reshape('c', miniBatch, inDepth, inH, inW);

    int[] intArgs = new int[] {1}; // 1 is for NCHW

    CustomOp op = DynamicCustomOp.builder("upsampling_bp")
            .addIntegerArguments(intArgs)
            .addInputs(input, epsilon)
            .addOutputs(reshapedEpsilon)
            .callInplace(false)
            .build();
    Nd4j.getExecutioner().exec(op);

    Gradient gradient = new DefaultGradient();

    reshapedEpsilon = reshapedEpsilon.slice(0, 3);
    input = originalInput;

    // Since we aggregate the gradient across "size" slices, we need to normalize afterwards.
    return new Pair<>(gradient, reshapedEpsilon.divi(size[0]));
}
 
Example #30
Source File: Subsampling1DLayer.java    From deeplearning4j with Apache License 2.0 5 votes vote down vote up
@Override
public Pair<Gradient, INDArray> backpropGradient(INDArray epsilon, LayerWorkspaceMgr workspaceMgr) {
    if (epsilon.rank() != 3)
        throw new DL4JInvalidInputException("Got rank " + epsilon.rank()
                        + " array as epsilon for Subsampling1DLayer backprop with shape "
                        + Arrays.toString(epsilon.shape())
                        + ". Expected rank 3 array with shape [minibatchSize, features, length]. " + layerId());
    if(maskArray != null){
        INDArray maskOut = feedForwardMaskArray(maskArray, MaskState.Active, (int)epsilon.size(0)).getFirst();
        Preconditions.checkState(epsilon.size(0) == maskOut.size(0) && epsilon.size(2) == maskOut.size(1),
                "Activation gradients dimensions (0,2) and mask dimensions (0,1) don't match: Activation gradients %s, Mask %s",
                epsilon.shape(), maskOut.shape());
        Broadcast.mul(epsilon, maskOut, epsilon, 0, 2);
    }

    // add singleton fourth dimension to input and next layer's epsilon
    INDArray origInput = input;
    input = input.castTo(dataType).reshape(input.size(0), input.size(1), input.size(2), 1);
    epsilon = epsilon.reshape(epsilon.size(0), epsilon.size(1), epsilon.size(2), 1);

    // call 2D SubsamplingLayer's backpropGradient method
    Pair<Gradient, INDArray> gradientEpsNext = super.backpropGradient(epsilon, workspaceMgr);
    INDArray epsNext = gradientEpsNext.getSecond();

    // remove singleton fourth dimension from input and current epsilon
    input = origInput;
    epsNext = epsNext.reshape(epsNext.size(0), epsNext.size(1), epsNext.size(2));

    return new Pair<>(gradientEpsNext.getFirst(), epsNext);
}