Java Code Examples for org.nd4j.linalg.factory.Nd4j#gemm()

The following examples show how to use org.nd4j.linalg.factory.Nd4j#gemm() . You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: DeepFMOutputLayer.java    From jstarcraft-rns with Apache License 2.0 6 votes vote down vote up
private Pair<Gradient, INDArray> getGradientsAndDelta(INDArray preOut, LayerWorkspaceMgr workspaceMgr) {
    ILossFunction lossFunction = layerConf().getLossFn();
    INDArray labels2d = getLabels2d(workspaceMgr, ArrayType.BP_WORKING_MEM);
    // INDArray delta = lossFunction.computeGradient(labels2d, preOut,
    // layerConf().getActivationFunction(), maskArray);
    INDArray delta = lossFunction.computeGradient(labels2d, preOut, layerConf().getActivationFn(), maskArray);

    Gradient gradient = new DefaultGradient();

    INDArray weightGradView = gradientViews.get(DefaultParamInitializer.WEIGHT_KEY);
    Nd4j.gemm(input, delta, weightGradView, true, false, 1.0, 0.0); // Equivalent to: weightGradView.assign(input.transpose().mmul(delta));
    gradient.gradientForVariable().put(DefaultParamInitializer.WEIGHT_KEY, weightGradView);

    if (hasBias()) {
        INDArray biasGradView = gradientViews.get(DefaultParamInitializer.BIAS_KEY);
        delta.sum(biasGradView, 0); // biasGradView is initialized/zeroed first in sum op
        gradient.gradientForVariable().put(DefaultParamInitializer.BIAS_KEY, biasGradView);
    }

    delta = workspaceMgr.leverageTo(ArrayType.ACTIVATION_GRAD, delta);
    return new Pair<>(gradient, delta);
}
 
Example 2
Source File: BaseOutputLayer.java    From deeplearning4j with Apache License 2.0 6 votes vote down vote up
/** Returns tuple: {Gradient,Delta,Output} given preOut */
private Pair<Gradient, INDArray> getGradientsAndDelta(INDArray preOut, LayerWorkspaceMgr workspaceMgr) {
    ILossFunction lossFunction = layerConf().getLossFn();
    INDArray labels2d = getLabels2d(workspaceMgr, ArrayType.BP_WORKING_MEM);
    //INDArray delta = lossFunction.computeGradient(labels2d, preOut, layerConf().getActivationFunction(), maskArray);
    INDArray delta = lossFunction.computeGradient(labels2d, preOut, layerConf().getActivationFn(), maskArray);

    Gradient gradient = new DefaultGradient();

    INDArray weightGradView = gradientViews.get(DefaultParamInitializer.WEIGHT_KEY);
    Nd4j.gemm(input.castTo(weightGradView.dataType()), delta, weightGradView, true, false, 1.0, 0.0); //Equivalent to:  weightGradView.assign(input.transpose().mmul(delta));         //TODO can we avoid cast?
    gradient.gradientForVariable().put(DefaultParamInitializer.WEIGHT_KEY, weightGradView);

    if(hasBias()){
        INDArray biasGradView = gradientViews.get(DefaultParamInitializer.BIAS_KEY);
        delta.sum(biasGradView, 0); //biasGradView is initialized/zeroed first in sum op
        gradient.gradientForVariable().put(DefaultParamInitializer.BIAS_KEY, biasGradView);
    }

    delta = workspaceMgr.leverageTo(ArrayType.ACTIVATION_GRAD, delta);
    return new Pair<>(gradient, delta);
}
 
Example 3
Source File: BlasTests.java    From deeplearning4j with Apache License 2.0 5 votes vote down vote up
@Test
public void testGemmInvalid1() {
    final INDArray a = Nd4j.rand(3, 4);
    final INDArray b = Nd4j.rand(4, 5);

    final INDArray target = Nd4j.zeros(new int[]{2, 3, 5}, 'f');
    final INDArray view = target.tensorAlongDimension(0, 1, 2);

    try {
        Nd4j.gemm(a, b, view, false, false, 1.0, 0.0);
        fail("Expected exception");
    } catch (IllegalArgumentException e) {
        assertTrue(e.getMessage().contains("view"));
    }
}
 
Example 4
Source File: BlasTests.java    From deeplearning4j with Apache License 2.0 5 votes vote down vote up
@Test
public void testGemmInvalid3() {
    final INDArray a = Nd4j.rand(4, 3);
    final INDArray b = Nd4j.rand(4, 5);

    final INDArray target = Nd4j.zeros(new int[]{2, 3, 5}, 'f');
    final INDArray view = target.tensorAlongDimension(0, 1, 2);

    try {
        Nd4j.gemm(a, b, view, true, false, 1.0, 0.0);
        fail("Expected exception");
    } catch (IllegalArgumentException e) {
        assertTrue(e.getMessage().contains("view"));
    }
}
 
Example 5
Source File: BlasTests.java    From deeplearning4j with Apache License 2.0 5 votes vote down vote up
@Test
public void testGemm1() {
    final INDArray a = Nd4j.rand(4, 3);
    final INDArray b = Nd4j.rand(4, 5);

    final INDArray result = a.transpose().mmul(b);
    final INDArray result2 = Nd4j.gemm(a, b, true, false);

    assertEquals(result, result2);
}
 
Example 6
Source File: CDAELayer.java    From jstarcraft-rns with Apache License 2.0 4 votes vote down vote up
@Override
public Pair<Gradient, INDArray> backpropGradient(INDArray epsilon, LayerWorkspaceMgr workspaceMgr) {
    assertInputSet(true);
    // If this layer is layer L, then epsilon is (w^(L+1)*(d^(L+1))^T) (or
    // equivalent)
    INDArray z = preOutput(true, workspaceMgr); // Note: using preOutput(INDArray) can't be used as this does a setInput(input)
                                                // and resets the 'appliedDropout' flag
    // INDArray activationDerivative =
    // Nd4j.getExecutioner().execAndReturn(Nd4j.getOpFactory().createTransform(conf().getLayer().getActivationFunction(),
    // z).derivative());
    // INDArray activationDerivative =
    // conf().getLayer().getActivationFn().getGradient(z);
    // INDArray delta = epsilon.muli(activationDerivative);
    INDArray delta = layerConf().getActivationFn().backprop(z, epsilon).getFirst(); // TODO handle activation function params

    if (maskArray != null) {
        applyMask(delta);
    }

    Gradient ret = new DefaultGradient();

    INDArray weightGrad = gradientViews.get(DefaultParamInitializer.WEIGHT_KEY); // f order
    Nd4j.gemm(input, delta, weightGrad, true, false, 1.0, 0.0);
    ret.gradientForVariable().put(DefaultParamInitializer.WEIGHT_KEY, weightGrad);

    INDArray userWeightGrad = gradientViews.get(CDAEParameter.USER_KEY);
    userWeightGrad.assign(delta);
    ret.gradientForVariable().put(CDAEParameter.USER_KEY, userWeightGrad);

    if (hasBias()) {
        INDArray biasGrad = gradientViews.get(DefaultParamInitializer.BIAS_KEY);
        delta.sum(biasGrad, 0); // biasGrad is initialized/zeroed first
        ret.gradientForVariable().put(DefaultParamInitializer.BIAS_KEY, biasGrad);
    }

    INDArray W = getParamWithNoise(DefaultParamInitializer.WEIGHT_KEY, true, workspaceMgr);
    INDArray epsilonNext = workspaceMgr.createUninitialized(ArrayType.ACTIVATION_GRAD, new long[] { W.size(0), delta.size(0) }, 'f');
    epsilonNext = W.mmuli(delta.transpose(), epsilonNext).transpose(); // W.mmul(delta.transpose()).transpose();

    weightNoiseParams.clear();

    epsilonNext = backpropDropOutIfPresent(epsilonNext);
    return new Pair<>(ret, epsilonNext);
}
 
Example 7
Source File: CheckUtil.java    From nd4j with Apache License 2.0 4 votes vote down vote up
public static boolean checkGemm(INDArray a, INDArray b, INDArray c, boolean transposeA, boolean transposeB,
                double alpha, double beta, double maxRelativeDifference, double minAbsDifference) {
    long commonDimA = (transposeA ? a.rows() : a.columns());
    long commonDimB = (transposeB ? b.columns() : b.rows());
    if (commonDimA != commonDimB)
        throw new IllegalArgumentException("Common dimensions don't match: a.shape=" + Arrays.toString(a.shape())
                        + ", b.shape=" + Arrays.toString(b.shape()) + ", tA=" + transposeA + ", tb=" + transposeB);
    long outRows = (transposeA ? a.columns() : a.rows());
    long outCols = (transposeB ? b.rows() : b.columns());
    if (c.rows() != outRows || c.columns() != outCols)
        throw new IllegalArgumentException("C does not match outRows or outCols");
    if (c.offset() != 0 || c.ordering() != 'f')
        throw new IllegalArgumentException("Invalid c");

    INDArray aConvert = transposeA ? a.transpose() : a;
    RealMatrix rmA = convertToApacheMatrix(aConvert);
    INDArray bConvet = transposeB ? b.transpose() : b;
    RealMatrix rmB = convertToApacheMatrix(bConvet);
    RealMatrix rmC = convertToApacheMatrix(c);
    RealMatrix rmExpected = rmA.scalarMultiply(alpha).multiply(rmB).add(rmC.scalarMultiply(beta));
    INDArray cCopy1 = Nd4j.create(c.shape(), 'f');
    cCopy1.assign(c);
    INDArray cCopy2 = Nd4j.create(c.shape(), 'f');
    cCopy2.assign(c);

    INDArray out = Nd4j.gemm(a, b, c, transposeA, transposeB, alpha, beta);
    if (out != c) {
        System.out.println("Returned different array than c");
        return false;
    }
    if (!checkShape(rmExpected, out))
        return false;
    boolean ok = checkEntries(rmExpected, out, maxRelativeDifference, minAbsDifference);
    if (!ok) {
        INDArray aCopy = Shape.toOffsetZeroCopy(a);
        INDArray bCopy = Shape.toOffsetZeroCopy(b);
        INDArray onCopies = Nd4j.gemm(aCopy, bCopy, cCopy1, transposeA, transposeB, alpha, beta);
        printGemmFailureDetails(a, b, cCopy2, transposeA, transposeB, alpha, beta, rmExpected, out, onCopies);
    }
    return ok;
}
 
Example 8
Source File: CheckUtil.java    From deeplearning4j with Apache License 2.0 4 votes vote down vote up
public static boolean checkGemm(INDArray a, INDArray b, INDArray c, boolean transposeA, boolean transposeB,
                double alpha, double beta, double maxRelativeDifference, double minAbsDifference) {
    long commonDimA = (transposeA ? a.rows() : a.columns());
    long commonDimB = (transposeB ? b.columns() : b.rows());
    if (commonDimA != commonDimB)
        throw new IllegalArgumentException("Common dimensions don't match: a.shape=" + Arrays.toString(a.shape())
                        + ", b.shape=" + Arrays.toString(b.shape()) + ", tA=" + transposeA + ", tb=" + transposeB);
    long outRows = (transposeA ? a.columns() : a.rows());
    long outCols = (transposeB ? b.rows() : b.columns());
    if (c.rows() != outRows || c.columns() != outCols)
        throw new IllegalArgumentException("C does not match outRows or outCols");
    if (c.offset() != 0 || c.ordering() != 'f')
        throw new IllegalArgumentException("Invalid c");

    INDArray aConvert = transposeA ? a.transpose() : a;
    RealMatrix rmA = convertToApacheMatrix(aConvert);
    INDArray bConvet = transposeB ? b.transpose() : b;
    RealMatrix rmB = convertToApacheMatrix(bConvet);
    RealMatrix rmC = convertToApacheMatrix(c);
    RealMatrix rmExpected = rmA.scalarMultiply(alpha).multiply(rmB).add(rmC.scalarMultiply(beta));
    INDArray cCopy1 = Nd4j.create(c.shape(), 'f');
    cCopy1.assign(c);
    INDArray cCopy2 = Nd4j.create(c.shape(), 'f');
    cCopy2.assign(c);

    INDArray out = Nd4j.gemm(a, b, c, transposeA, transposeB, alpha, beta);
    if (out != c) {
        System.out.println("Returned different array than c");
        return false;
    }
    if (!checkShape(rmExpected, out))
        return false;
    boolean ok = checkEntries(rmExpected, out, maxRelativeDifference, minAbsDifference);
    if (!ok) {
        INDArray aCopy = Shape.toOffsetZeroCopy(a);
        INDArray bCopy = Shape.toOffsetZeroCopy(b);
        INDArray onCopies = Nd4j.gemm(aCopy, bCopy, cCopy1, transposeA, transposeB, alpha, beta);
        printGemmFailureDetails(a, b, cCopy2, transposeA, transposeB, alpha, beta, rmExpected, out, onCopies);
    }
    return ok;
}
 
Example 9
Source File: CenterLossOutputLayer.java    From deeplearning4j with Apache License 2.0 4 votes vote down vote up
/** Returns tuple: {Gradient,Delta,Output} given preOut */
private Pair<Gradient, INDArray> getGradientsAndDelta(INDArray preOut, LayerWorkspaceMgr workspaceMgr) {
    ILossFunction lossFunction = layerConf().getLossFn();
    INDArray labels2d = getLabels2d(workspaceMgr, ArrayType.BP_WORKING_MEM);
    if (labels2d.size(1) != preOut.size(1)) {
        throw new DL4JInvalidInputException(
                        "Labels array numColumns (size(1) = " + labels2d.size(1) + ") does not match output layer"
                                        + " number of outputs (nOut = " + preOut.size(1) + ") " + layerId());
    }

    INDArray delta = lossFunction.computeGradient(labels2d, preOut, layerConf().getActivationFn(), maskArray);

    Gradient gradient = new DefaultGradient();

    INDArray weightGradView = gradientViews.get(CenterLossParamInitializer.WEIGHT_KEY);
    INDArray biasGradView = gradientViews.get(CenterLossParamInitializer.BIAS_KEY);
    INDArray centersGradView = gradientViews.get(CenterLossParamInitializer.CENTER_KEY);

    // centers delta
    double alpha = layerConf().getAlpha();

    INDArray centers = params.get(CenterLossParamInitializer.CENTER_KEY);
    INDArray l = labels.castTo(centers.dataType()); //Ensure correct dtype (same as params); no-op if already correct dtype
    INDArray centersForExamples = l.mmul(centers);
    INDArray diff = centersForExamples.sub(input).muli(alpha);
    INDArray numerator = l.transpose().mmul(diff);
    INDArray denominator = l.sum(0).reshape(l.size(1), 1).addi(1.0);

    INDArray deltaC;
    if (layerConf().getGradientCheck()) {
        double lambda = layerConf().getLambda();
        //For gradient checks: need to multiply dLc/dcj by lambda to get dL/dcj
        deltaC = numerator.muli(lambda);
    } else {
        deltaC = numerator.diviColumnVector(denominator);
    }
    centersGradView.assign(deltaC);



    // other standard calculations
    Nd4j.gemm(input, delta, weightGradView, true, false, 1.0, 0.0); //Equivalent to:  weightGradView.assign(input.transpose().mmul(delta));
    delta.sum(biasGradView, 0); //biasGradView is initialized/zeroed first in sum op

    gradient.gradientForVariable().put(CenterLossParamInitializer.WEIGHT_KEY, weightGradView);
    gradient.gradientForVariable().put(CenterLossParamInitializer.BIAS_KEY, biasGradView);
    gradient.gradientForVariable().put(CenterLossParamInitializer.CENTER_KEY, centersGradView);

    return new Pair<>(gradient, delta);
}
 
Example 10
Source File: VariationalAutoencoder.java    From deeplearning4j with Apache License 2.0 4 votes vote down vote up
@Override
public Pair<Gradient, INDArray> backpropGradient(INDArray epsilon, LayerWorkspaceMgr workspaceMgr) {
    assertInputSet(true);
    if (!zeroedPretrainParamGradients) {
        for (Map.Entry<String, INDArray> entry : gradientViews.entrySet()) {
            if (isPretrainParam(entry.getKey())) {
                entry.getValue().assign(0);
            }
        }
        zeroedPretrainParamGradients = true;
    }

    INDArray input = this.input.castTo(dataType);

    Gradient gradient = new DefaultGradient();

    VAEFwdHelper fwd = doForward(true, true, workspaceMgr);
    INDArray currentDelta = pzxActivationFn.backprop(fwd.pzxMeanPreOut, epsilon).getFirst();

    //Finally, calculate mean value:
    INDArray meanW = getParamWithNoise(VariationalAutoencoderParamInitializer.PZX_MEAN_W, true, workspaceMgr);
    INDArray dLdMeanW = gradientViews.get(VariationalAutoencoderParamInitializer.PZX_MEAN_W); //f order
    INDArray lastEncoderActivation = fwd.encoderActivations[fwd.encoderActivations.length - 1];
    Nd4j.gemm(lastEncoderActivation, currentDelta, dLdMeanW, true, false, 1.0, 0.0);
    INDArray dLdMeanB = gradientViews.get(VariationalAutoencoderParamInitializer.PZX_MEAN_B);
    currentDelta.sum(dLdMeanB, 0); //dLdMeanB is initialized/zeroed first in sum op

    gradient.gradientForVariable().put(VariationalAutoencoderParamInitializer.PZX_MEAN_W, dLdMeanW);
    gradient.gradientForVariable().put(VariationalAutoencoderParamInitializer.PZX_MEAN_B, dLdMeanB);

    epsilon = meanW.mmul(currentDelta.transpose()).transpose();

    int nEncoderLayers = encoderLayerSizes.length;

    IActivation afn = layerConf().getActivationFn();
    for (int i = nEncoderLayers - 1; i >= 0; i--) {
        String wKey = "e" + i + WEIGHT_KEY_SUFFIX;
        String bKey = "e" + i + BIAS_KEY_SUFFIX;

        INDArray weights = getParamWithNoise(wKey, true, workspaceMgr);

        INDArray dLdW = gradientViews.get(wKey);
        INDArray dLdB = gradientViews.get(bKey);

        INDArray preOut = fwd.encoderPreOuts[i];

        currentDelta = afn.backprop(preOut, epsilon).getFirst();

        INDArray actInput;
        if (i == 0) {
            actInput = input;
        } else {
            actInput = fwd.encoderActivations[i - 1];
        }
        Nd4j.gemm(actInput, currentDelta, dLdW, true, false, 1.0, 0.0);
        currentDelta.sum(dLdB, 0); //dLdB is initialized/zeroed first in sum op

        gradient.gradientForVariable().put(wKey, dLdW);
        gradient.gradientForVariable().put(bKey, dLdB);

        if(i == 0) {
            epsilon = workspaceMgr.createUninitialized(ArrayType.ACTIVATION_GRAD, currentDelta.dataType(), new long[]{weights.size(0), currentDelta.size(0)}, 'f');
            weights.mmuli(currentDelta.transpose(), epsilon);
            epsilon = epsilon.transpose();
        } else {
            epsilon = weights.mmul(currentDelta.transpose()).transpose();
        }
    }

    return new Pair<>(gradient, epsilon);
}
 
Example 11
Source File: BaseLayer.java    From deeplearning4j with Apache License 2.0 4 votes vote down vote up
@Override
public Pair<Gradient, INDArray> backpropGradient(INDArray epsilon, LayerWorkspaceMgr workspaceMgr) {
    assertInputSet(true);
    //If this layer is layer L, then epsilon is (w^(L+1)*(d^(L+1))^T) (or equivalent)
    Pair<INDArray, INDArray> zAndPreNorm = preOutputWithPreNorm(true, true, workspaceMgr);
    INDArray z = zAndPreNorm.getFirst(); //Note: using preOutput(INDArray) can't be used as this does a setInput(input) and resets the 'appliedDropout' flag
    INDArray preNorm = zAndPreNorm.getSecond();
    INDArray delta = layerConf().getActivationFn().backprop(z, epsilon).getFirst(); //TODO handle activation function params

    if (maskArray != null) {
        applyMask(delta);
    }

    Gradient ret = new DefaultGradient();

    if(hasBias()){
        INDArray biasGrad = gradientViews.get(DefaultParamInitializer.BIAS_KEY);
        delta.sum(biasGrad, 0); //biasGrad is initialized/zeroed first
        ret.gradientForVariable().put(DefaultParamInitializer.BIAS_KEY, biasGrad);
    }

    INDArray W = getParamWithNoise(DefaultParamInitializer.WEIGHT_KEY, true, workspaceMgr);

    INDArray epsilonNext = workspaceMgr.createUninitialized(ArrayType.ACTIVATION_GRAD, delta.dataType(), new long[]{W.size(0), delta.size(0)}, 'f');
    if(hasLayerNorm()) {
        INDArray g = getParam(DefaultParamInitializer.GAIN_KEY);

        INDArray dldg = gradientViews.get(DefaultParamInitializer.GAIN_KEY);
        Nd4j.getExecutioner().exec(new LayerNormBp(preNorm, g, delta, delta, dldg, true, 1));
        ret.gradientForVariable().put(DefaultParamInitializer.GAIN_KEY, dldg);

    }

    epsilonNext = W.mmuli(delta.transpose(),epsilonNext).transpose();   //W.mmul(delta.transpose()).transpose();

    INDArray weightGrad = gradientViews.get(DefaultParamInitializer.WEIGHT_KEY); //f order
    Nd4j.gemm(input.castTo(weightGrad.dataType()), delta, weightGrad, true, false, 1.0, 0.0);           //TODO avoid castTo?
    ret.gradientForVariable().put(DefaultParamInitializer.WEIGHT_KEY, weightGrad);

    weightNoiseParams.clear();

    epsilonNext = backpropDropOutIfPresent(epsilonNext);
    return new Pair<>(ret, epsilonNext);
}
 
Example 12
Source File: CustomLayerImpl.java    From deeplearning4j with Apache License 2.0 4 votes vote down vote up
@Override
    public Pair<Gradient, INDArray> backpropGradient(INDArray epsilon, LayerWorkspaceMgr workspaceMgr) {
        /*
        The baockprop gradient method here is very similar to the BaseLayer backprop gradient implementation
        The only major difference is the two activation functions we have added in this example.

        Note that epsilon is dL/da - i.e., the derivative of the loss function with respect to the activations.
        It has the exact same shape as the activation arrays (i.e., the output of preOut and activate methods)
        This is NOT the 'delta' commonly used in the neural network literature; the delta is obtained from the
        epsilon ("epsilon" is dl4j's notation) by doing an element-wise product with the activation function derivative.

        Note the following:
        1. Is it very important that you use the gradientViews arrays for the results.
           Note the gradientViews.get(...) and the in-place operations here.
           This is because DL4J uses a single large array for the gradients for efficiency. Subsets of this array (views)
           are distributed to each of the layers for efficient backprop and memory management.
        2. The method returns two things, as a Pair:
           (a) a Gradient object (essentially a Map<String,INDArray> of the gradients for each parameter (again, these
               are views of the full network gradient array)
           (b) an INDArray. This INDArray is the 'epsilon' to pass to the layer below. i.e., it is the gradient with
               respect to the input to this layer

        */

        INDArray activationDerivative = preOutput(true, workspaceMgr);
        int columns = activationDerivative.columns();

        INDArray firstHalf = activationDerivative.get(NDArrayIndex.all(), NDArrayIndex.interval(0, columns / 2));
        INDArray secondHalf = activationDerivative.get(NDArrayIndex.all(), NDArrayIndex.interval(columns / 2, columns));

        INDArray epsilonFirstHalf = epsilon.get(NDArrayIndex.all(), NDArrayIndex.interval(0, columns / 2));
        INDArray epsilonSecondHalf = epsilon.get(NDArrayIndex.all(), NDArrayIndex.interval(columns / 2, columns));

        IActivation activation1 = layerConf().getActivationFn();
        IActivation activation2 = ((CustomLayer) conf.getLayer()).getSecondActivationFunction();

        //IActivation backprop method modifies the 'firstHalf' and 'secondHalf' arrays in-place, to contain dL/dz
        activation1.backprop(firstHalf, epsilonFirstHalf);
        activation2.backprop(secondHalf, epsilonSecondHalf);

        //The remaining code for this method: just copy & pasted from BaseLayer.backpropGradient
//        INDArray delta = epsilon.muli(activationDerivative);
        if (maskArray != null) {
            activationDerivative.muliColumnVector(maskArray);
        }

        Gradient ret = new DefaultGradient();

        INDArray weightGrad = gradientViews.get(DefaultParamInitializer.WEIGHT_KEY);    //f order
        Nd4j.gemm(input, activationDerivative, weightGrad, true, false, 1.0, 0.0);
        INDArray biasGrad = gradientViews.get(DefaultParamInitializer.BIAS_KEY);
        biasGrad.assign(activationDerivative.sum(0));  //TODO: do this without the assign

        ret.gradientForVariable().put(DefaultParamInitializer.WEIGHT_KEY, weightGrad);
        ret.gradientForVariable().put(DefaultParamInitializer.BIAS_KEY, biasGrad);

        INDArray epsilonNext = params.get(DefaultParamInitializer.WEIGHT_KEY).mmul(activationDerivative.transpose()).transpose();

        return new Pair<>(ret, epsilonNext);
    }
 
Example 13
Source File: CrashTest.java    From nd4j with Apache License 2.0 2 votes vote down vote up
protected void op(INDArray x, INDArray y, int i) {
    // broadcast along row & column
    INDArray row = Nd4j.ones(64);
    INDArray column = Nd4j.ones(1024, 1);

    x.addiRowVector(row);
    x.addiColumnVector(column);

    // casual scalar
    x.addi(i * 2);

    // reduction along all dimensions
    float sum = x.sumNumber().floatValue();

    // index reduction
    Nd4j.getExecutioner().exec(new IMax(x), Integer.MAX_VALUE);

    // casual transform
    Nd4j.getExecutioner().exec(new Sqrt(x, x));

    //  dup
    INDArray x1 = x.dup(x.ordering());
    INDArray x2 = x.dup(x.ordering());
    INDArray x3 = x.dup('c');
    INDArray x4 = x.dup('f');


    // vstack && hstack
    INDArray vstack = Nd4j.vstack(x, x1, x2, x3, x4);

    INDArray hstack = Nd4j.hstack(x, x1, x2, x3, x4);

    // reduce3 call
    Nd4j.getExecutioner().exec(new ManhattanDistance(x, x2));


    // flatten call
    INDArray flat = Nd4j.toFlattened(x, x1, x2, x3, x4);


    // reduction along dimension: row & column
    INDArray max_0 = x.max(0);
    INDArray max_1 = x.max(1);


    // index reduction along dimension: row & column
    INDArray imax_0 = Nd4j.argMax(x, 0);
    INDArray imax_1 = Nd4j.argMax(x, 1);


    // logisoftmax, softmax & softmax derivative
    Nd4j.getExecutioner().exec(new OldSoftMax(x));
    Nd4j.getExecutioner().exec(new SoftMaxDerivative(x));
    Nd4j.getExecutioner().exec(new LogSoftMax(x));


    // BooleanIndexing
    BooleanIndexing.replaceWhere(x, 5f, Conditions.lessThan(8f));

    // assing on view
    BooleanIndexing.assignIf(x, x1, Conditions.greaterThan(-1000000000f));

    // std var along all dimensions
    float std = x.stdNumber().floatValue();

    // std var along row & col
    INDArray xStd_0 = x.std(0);
    INDArray xStd_1 = x.std(1);

    // blas call
    float dot = (float) Nd4j.getBlasWrapper().dot(x, x1);

    // mmul
    for (boolean tA : paramsA) {
        for (boolean tB : paramsB) {

            INDArray xT = tA ? x.dup() : x.dup().transpose();
            INDArray yT = tB ? y.dup() : y.dup().transpose();

            Nd4j.gemm(xT, yT, tA, tB);
        }
    }

    // specially for views, checking here without dup and rollover
    Nd4j.gemm(x, y, false, false);

    log.debug("Iteration passed: " + i);
}
 
Example 14
Source File: CrashTest.java    From deeplearning4j with Apache License 2.0 2 votes vote down vote up
protected void op(INDArray x, INDArray y, int i) {
    // broadcast along row & column
    INDArray row = Nd4j.ones(64);
    INDArray column = Nd4j.ones(1024, 1);

    x.addiRowVector(row);
    x.addiColumnVector(column);

    // casual scalar
    x.addi(i * 2);

    // reduction along all dimensions
    float sum = x.sumNumber().floatValue();

    // index reduction
    Nd4j.getExecutioner().exec(new ArgMax(x));

    // casual transform
    Nd4j.getExecutioner().exec(new Sqrt(x, x));

    //  dup
    INDArray x1 = x.dup(x.ordering());
    INDArray x2 = x.dup(x.ordering());
    INDArray x3 = x.dup('c');
    INDArray x4 = x.dup('f');


    // vstack && hstack
    INDArray vstack = Nd4j.vstack(x, x1, x2, x3, x4);

    INDArray hstack = Nd4j.hstack(x, x1, x2, x3, x4);

    // reduce3 call
    Nd4j.getExecutioner().exec(new ManhattanDistance(x, x2));


    // flatten call
    INDArray flat = Nd4j.toFlattened(x, x1, x2, x3, x4);


    // reduction along dimension: row & column
    INDArray max_0 = x.max(0);
    INDArray max_1 = x.max(1);


    // index reduction along dimension: row & column
    INDArray imax_0 = Nd4j.argMax(x, 0);
    INDArray imax_1 = Nd4j.argMax(x, 1);


    // logisoftmax, softmax & softmax derivative
    Nd4j.getExecutioner().exec((CustomOp) new SoftMax(x));
    Nd4j.getExecutioner().exec((CustomOp) new LogSoftMax(x));


    // BooleanIndexing
    BooleanIndexing.replaceWhere(x, 5f, Conditions.lessThan(8f));

    // assing on view
    BooleanIndexing.assignIf(x, x1, Conditions.greaterThan(-1000000000f));

    // std var along all dimensions
    float std = x.stdNumber().floatValue();

    // std var along row & col
    INDArray xStd_0 = x.std(0);
    INDArray xStd_1 = x.std(1);

    // blas call
    float dot = (float) Nd4j.getBlasWrapper().dot(x, x1);

    // mmul
    for (boolean tA : paramsA) {
        for (boolean tB : paramsB) {

            INDArray xT = tA ? x.dup() : x.dup().transpose();
            INDArray yT = tB ? y.dup() : y.dup().transpose();

            Nd4j.gemm(xT, yT, tA, tB);
        }
    }

    // specially for views, checking here without dup and rollover
    Nd4j.gemm(x, y, false, false);

    log.debug("Iteration passed: " + i);
}