Java Code Examples for org.nd4j.linalg.api.ndarray.INDArray#norm2()

The following examples show how to use org.nd4j.linalg.api.ndarray.INDArray#norm2() . You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: MinMaxNormConstraint.java    From deeplearning4j with Apache License 2.0 6 votes vote down vote up
@Override
public void apply(INDArray param) {
    INDArray norm = param.norm2(dimensions);
    INDArray clipped = norm.unsafeDuplication();
    CustomOp op = DynamicCustomOp.builder("clipbyvalue")
            .addInputs(clipped)
            .callInplace(true)
            .addFloatingPointArguments(min, max)
            .build();
    Nd4j.getExecutioner().exec(op);

    norm.addi(epsilon);
    clipped.divi(norm);

    if(rate != 1.0){
        clipped.muli(rate).addi(norm.muli(1.0-rate));
    }

    Broadcast.mul(param, clipped, param, getBroadcastDims(dimensions, param.rank()) );
}
 
Example 2
Source File: L2NormalizeVertex.java    From deeplearning4j with Apache License 2.0 6 votes vote down vote up
@Override
public INDArray doForward(boolean training, LayerWorkspaceMgr workspaceMgr) {
    if (!canDoForward())
        throw new IllegalStateException("Cannot do forward pass: inputs not set (L2NormalizeVertex " + vertexName
                        + " idx " + vertexIndex + ")");

    // L2 norm along all dimensions except 0, unless user-specified
    // x / |x|2
    INDArray x = inputs[0];
    int[] dimensions = getDimensions(x);

    INDArray xNorm2 = x.norm2(dimensions);
    Transforms.max(xNorm2, eps, false);
    try(MemoryWorkspace ws = workspaceMgr.notifyScopeBorrowed(ArrayType.ACTIVATIONS)){
        if (x.rank() == 2) {
            return x.divColumnVector(xNorm2);
        } else {
            INDArray out = Nd4j.createUninitialized(x.shape(), x.ordering());
            return Nd4j.getExecutioner().exec(new BroadcastDivOp(x, xNorm2, out, 0));
        }
    }
}
 
Example 3
Source File: ReductionBpOpValidation.java    From deeplearning4j with Apache License 2.0 5 votes vote down vote up
@Test
public void testNorm2AlongDimensionBP() {
    //dL/dIn = dL/dOut * dOut/dIn
    //       = dL/dOut * x/|x|_2

    for (boolean keepDims : new boolean[]{false, true}) {

        long[] reducedShape_0 = (keepDims ? new long[]{1, 4} : new long[]{4});
        INDArray preReduceInput = Nd4j.linspace(1, 12, 12).reshape(3, 4);
        INDArray norm2_0 = preReduceInput.norm2(0);
        INDArray dLdOut_0 = Nd4j.create(new double[]{1, 2, 3, 4}, reducedShape_0);
        INDArray dLdInExpected_0 = preReduceInput.divRowVector(norm2_0).mulRowVector(dLdOut_0);

        INDArray dLdIn = Nd4j.createUninitialized(3, 4);

        String err = OpValidation.validate(new OpTestCase(new Norm2Bp(preReduceInput, dLdOut_0, dLdIn, keepDims, 0))
                .expectedOutput(0, dLdInExpected_0));
        assertNull(err);


        long[] reducedShape_1 = (keepDims ? new long[]{3, 1} : new long[]{3});
        INDArray norm2_1 = preReduceInput.norm2(1);
        INDArray dLdOut_1 = Nd4j.create(new double[]{1, 2, 3}, reducedShape_1);
        INDArray dLdInExpected_1 = preReduceInput.divColumnVector(norm2_1).mulColumnVector(dLdOut_1);
        dLdIn = Nd4j.createUninitialized(3, 4);

        err = OpValidation.validate(new OpTestCase(new Norm2Bp(preReduceInput, dLdOut_1, dLdIn, keepDims, 1))
                .expectedOutput(0, dLdInExpected_1));

        assertNull(err);
    }
}
 
Example 4
Source File: LossCosineProximity.java    From deeplearning4j with Apache License 2.0 5 votes vote down vote up
/**
 *
 * @param labels
 * @param preOutput
 * @param activationFn
 * @param mask
 * @return
 */
public INDArray scoreArray(INDArray labels, INDArray preOutput, IActivation activationFn, INDArray mask) {
    if(!labels.equalShapes(preOutput)){
        Preconditions.throwEx("Labels and preOutput must have equal shapes: got shapes %s vs %s", labels.shape(), preOutput.shape());
    }
    labels = labels.castTo(preOutput.dataType());   //No-op if already correct dtype

    /*
     mean of -(y.dot(yhat)/||y||*||yhat||)
     */
    INDArray postOutput = activationFn.getActivation(preOutput.dup(), true);

    INDArray yhatmag = postOutput.norm2(1);
    INDArray ymag = labels.norm2(1);
    yhatmag = Transforms.max(yhatmag, Nd4j.EPS_THRESHOLD, false);
    ymag = Transforms.max(ymag, Nd4j.EPS_THRESHOLD, false);

    INDArray scoreArr = postOutput.mul(labels);
    scoreArr.diviColumnVector(yhatmag);
    scoreArr.diviColumnVector(ymag);

    if (mask != null) {
        if (!mask.isColumnVector()) {
            //Per-output masking doesn't really make sense for cosine proximity
            throw new UnsupportedOperationException("Expected column vector mask array for LossCosineProximity."
                            + " Got mask array with shape " + Arrays.toString(mask.shape())
                            + "; per-output masking is not " + "supported for LossCosineProximity");
        }
        scoreArr.muliColumnVector(mask);
    }
    return scoreArr.muli(-1);
}
 
Example 5
Source File: OpExecutionerTestsC.java    From deeplearning4j with Apache License 2.0 5 votes vote down vote up
@Test
public void testNorm1AlongDimension() {
    INDArray arr = Nd4j.linspace(1, 8, 8, DataType.DOUBLE).reshape(2, 4);
    INDArray arrNorm1 = arr.norm2(1);
    INDArray assertion = Nd4j.create(new double[] {5.47722558, 13.19090596});
    assertEquals(assertion, arrNorm1);
}
 
Example 6
Source File: MiscOpValidation.java    From deeplearning4j with Apache License 2.0 5 votes vote down vote up
@Test
public void testClipByNorm0(){
    //Expected: if array.norm2(0) is less than 1.0, not modified
    //Otherwise: array.tad(x,1) = array.tad(x,1) * 1.0 / array.tad(x,1).norm2()

    Nd4j.getRandom().setSeed(12345);
    INDArray arr = Nd4j.rand(5,4);
    INDArray norm2_0 = arr.norm2(0);
    arr.diviRowVector(norm2_0);

    INDArray initNorm2 = Nd4j.create(new double[]{2.2, 2.1, 2.0, 1.9}, new int[]{4});     //Initial norm2s along dimension 0
    arr.muliRowVector(initNorm2);
    norm2_0 = arr.norm2(0);

    assertEquals(initNorm2, norm2_0);

    INDArray out = Nd4j.create(arr.shape());

    INDArray norm2_0b = out.norm2(0);
    INDArray expNorm = Nd4j.create(new double[]{2.0, 2.0, 2.0, 1.9}, new int[]{1, 4});  //Post clip norm2s along dimension 0
    INDArray exp = arr.divRowVector(norm2_0b).muliRowVector(expNorm);

    OpTestCase op = new OpTestCase(//Clip to norm2 of 2.0, along dimension 0
            new ClipByNorm(arr, out, 2.0, 0))
            .expectedOutput(0, exp);

    assertNull(OpValidation.validate(op));
}
 
Example 7
Source File: MiscOpValidation.java    From deeplearning4j with Apache License 2.0 5 votes vote down vote up
@Test
    public void testClipByNorm1(){
        //Expected: if array.norm2(1) is less than 1.0, not modified
        //Otherwise: array.tad(x,1) = array.tad(x,1) * 1.0 / array.tad(x,1).norm2()

        Nd4j.getRandom().setSeed(12345);
        INDArray arr = Nd4j.rand(3,5);
        INDArray norm2_1 = arr.norm2(1);
        arr.diviColumnVector(norm2_1);

        norm2_1 = arr.norm2(1);
        assertEquals(Nd4j.ones(3), norm2_1);

        INDArray scale = Nd4j.create(new double[]{1.1, 1.0, 0.9}, new int[]{3,1});
        arr.muliColumnVector(scale);
        norm2_1 = arr.norm2(1);

        INDArray out = Nd4j.createUninitialized(arr.shape());

        INDArray expNorm2 = Nd4j.create(new double[]{1.0, 1.0, norm2_1.getDouble(2)}, new int[]{3,1});

        INDArray expOut = arr.divColumnVector(norm2_1).muliColumnVector(expNorm2);


        OpTestCase op = new OpTestCase(
                new ClipByNorm(arr, out, 1.0, 1))
                .expectedOutput(0, expOut);

//        System.out.println("Input");
//        System.out.println(arr.shapeInfoToString());
//        System.out.println(Arrays.toString(arr.data().asFloat()));
//
//        System.out.println("Expected");
//        System.out.println(expOut.shapeInfoToString());
//        System.out.println(Arrays.toString(expOut.data().asFloat()));

        String err = OpValidation.validate(op);
        assertNull(err);
    }
 
Example 8
Source File: LossCosineProximity.java    From deeplearning4j with Apache License 2.0 5 votes vote down vote up
@Override
public INDArray computeGradient(INDArray labels, INDArray preOutput, IActivation activationFn, INDArray mask) {
    if(!labels.equalShapes(preOutput)){
        Preconditions.throwEx("Labels and preOutput must have equal shapes: got shapes %s vs %s", labels.shape(), preOutput.shape());
    }
    labels = labels.castTo(preOutput.dataType());   //No-op if already correct dtype
    INDArray yhat = activationFn.getActivation(preOutput.dup(), true);
    INDArray yL2norm = labels.norm2(1);

    INDArray yhatL2norm = yhat.norm2(1);
    INDArray yhatL2normSq = yhatL2norm.mul(yhatL2norm);

    //Note: This is not really the L1 norm since I am not taking abs values
    INDArray yhatDotyL1norm = labels.mul(yhat).sum(true,1);

    INDArray dLda = labels.mulColumnVector(yhatL2normSq);
    dLda.subi(yhat.mulColumnVector(yhatDotyL1norm));

    // transform vals to avoid nans before div
    yL2norm = Transforms.max(yL2norm, Nd4j.EPS_THRESHOLD, false);
    yhatL2norm = Transforms.max(yhatL2norm, Nd4j.EPS_THRESHOLD, false);
    yhatL2normSq = Transforms.max(yhatL2normSq, Nd4j.EPS_THRESHOLD, false);

    dLda.diviColumnVector(yL2norm);
    dLda.diviColumnVector(yhatL2norm.mul(yhatL2normSq));
    dLda.muli(-1);

    //dL/dz
    INDArray gradients = activationFn.backprop(preOutput, dLda).getFirst(); //TODO loss functions with params

    if (mask != null) {
        gradients.muliColumnVector(mask);
    }

    return gradients;
}
 
Example 9
Source File: SporadicTests.java    From nd4j with Apache License 2.0 5 votes vote down vote up
public static INDArray scoreArray(INDArray labels, INDArray preOutput) {
    INDArray yhatmag = preOutput.norm2(1);

    INDArray scoreArr = preOutput.mul(labels);
    scoreArr.diviColumnVector(yhatmag);

    return scoreArr;
}
 
Example 10
Source File: LossCosineProximity.java    From nd4j with Apache License 2.0 5 votes vote down vote up
@Override
public INDArray computeGradient(INDArray labels, INDArray preOutput, IActivation activationFn, INDArray mask) {
    if (labels.size(1) != preOutput.size(1)) {
        throw new IllegalArgumentException(
                        "Labels array numColumns (size(1) = " + labels.size(1) + ") does not match output layer"
                                        + " number of outputs (nOut = " + preOutput.size(1) + ") ");

    }
    INDArray yhat = activationFn.getActivation(preOutput.dup(), true);
    INDArray yL2norm = labels.norm2(1);

    INDArray yhatL2norm = yhat.norm2(1);
    INDArray yhatL2normSq = yhatL2norm.mul(yhatL2norm);

    //Note: This is not really the L1 norm since I am not taking abs values
    INDArray yhatDotyL1norm = labels.mul(yhat).sum(1);

    INDArray dLda = labels.mulColumnVector(yhatL2normSq);
    dLda.subi(yhat.mulColumnVector(yhatDotyL1norm));

    // transform vals to avoid nans before div
    yL2norm = Transforms.max(yL2norm, Nd4j.EPS_THRESHOLD, false);
    yhatL2norm = Transforms.max(yhatL2norm, Nd4j.EPS_THRESHOLD, false);
    yhatL2normSq = Transforms.max(yhatL2normSq, Nd4j.EPS_THRESHOLD, false);

    dLda.diviColumnVector(yL2norm);
    dLda.diviColumnVector(yhatL2norm.mul(yhatL2normSq));
    dLda.muli(-1);

    //dL/dz
    INDArray gradients = activationFn.backprop(preOutput, dLda).getFirst(); //TODO loss functions with params

    if (mask != null) {
        gradients.muliColumnVector(mask);
    }

    return gradients;
}
 
Example 11
Source File: LossCosineProximity.java    From nd4j with Apache License 2.0 5 votes vote down vote up
/**
 *
 * @param labels
 * @param preOutput
 * @param activationFn
 * @param mask
 * @return
 */
public INDArray scoreArray(INDArray labels, INDArray preOutput, IActivation activationFn, INDArray mask) {
    if (labels.size(1) != preOutput.size(1)) {
        throw new IllegalArgumentException(
                        "Labels array numColumns (size(1) = " + labels.size(1) + ") does not match output layer"
                                        + " number of outputs (nOut = " + preOutput.size(1) + ") ");

    }
    /*
     mean of -(y.dot(yhat)/||y||*||yhat||)
     */
    //INDArray postOutput = Nd4j.getExecutioner().execAndReturn(Nd4j.getOpFactory().createTransform(activationFn, preOutput.dup()));
    INDArray postOutput = activationFn.getActivation(preOutput.dup(), true);

    INDArray yhatmag = postOutput.norm2(1);
    INDArray ymag = labels.norm2(1);
    yhatmag = Transforms.max(yhatmag, Nd4j.EPS_THRESHOLD, false);
    ymag = Transforms.max(ymag, Nd4j.EPS_THRESHOLD, false);

    INDArray scoreArr = postOutput.mul(labels);
    scoreArr.diviColumnVector(yhatmag);
    scoreArr.diviColumnVector(ymag);

    if (mask != null) {
        if (!mask.isColumnVector()) {
            //Per-output masking doesn't really make sense for cosine proximity
            throw new UnsupportedOperationException("Expected column vector mask array for LossCosineProximity."
                            + " Got mask array with shape " + Arrays.toString(mask.shape())
                            + "; per-output masking is not " + "supported for LossCosineProximity");
        }
        scoreArr.muliColumnVector(mask);
    }
    return scoreArr.muli(-1);
}
 
Example 12
Source File: OpExecutionerTestsC.java    From nd4j with Apache License 2.0 5 votes vote down vote up
@Test
public void testNorm1AlongDimension() {
    INDArray arr = Nd4j.linspace(1, 8, 8).reshape(2, 4);
    INDArray arrNorm1 = arr.norm2(1);
    INDArray assertion = Nd4j.create(new double[] {5.47722558, 13.19090596});
    assertEquals(assertion, arrNorm1);
}
 
Example 13
Source File: NativeOpExecutionerTest.java    From nd4j with Apache License 2.0 5 votes vote down vote up
@Test
public void testDebugEdgeCase2(){
    DataTypeUtil.setDTypeForContext(DataBuffer.Type.DOUBLE);
    INDArray l1 = Nd4j.create(new double[]{-0.2585039112684677,-0.005179485353710878,0.4348343401770497,0.020356532375728764,-0.1970793298488186});
    INDArray l2 = Nd4j.create(2,l1.size(1));

    INDArray p1 = Nd4j.create(new double[]{1.3979850406519119,0.6169451410155852,1.128993957530918,0.21000426084450596,0.3171215178932696});
    INDArray p2 = Nd4j.create(2, p1.size(1));

    for( int i=0; i<2; i++ ){
        l2.putRow(i, l1);
        p2.putRow(i, p1);
    }

    INDArray norm2_1 = l1.norm2(1);
    INDArray temp1 = p1.mul(l1);
    INDArray out1 = temp1.diviColumnVector(norm2_1);

    INDArray norm2_2 = l2.norm2(1);
    INDArray temp2 = p2.mul(l2);
    INDArray out2 = temp2.diviColumnVector(norm2_2);

    System.out.println("norm2_1: " + Arrays.toString(norm2_1.data().asDouble()));
    System.out.println("norm2_2: " + Arrays.toString(norm2_2.data().asDouble()));

    System.out.println("temp1: " + Arrays.toString(temp1.data().asDouble()));
    System.out.println("temp2: " + Arrays.toString(temp2.data().asDouble()));

    //Outputs here should be identical:
    System.out.println(Arrays.toString(out1.data().asDouble()));
    System.out.println(Arrays.toString(out2.getRow(0).dup().data().asDouble()));
}
 
Example 14
Source File: NativeOpExecutionerTest.java    From nd4j with Apache License 2.0 5 votes vote down vote up
public static INDArray scoreArray(INDArray labels, INDArray preOutput) {
    INDArray yhatmag = preOutput.norm2(1);

    INDArray scoreArr = preOutput.mul(labels);
    scoreArr.diviColumnVector(yhatmag);

    return scoreArr;
}
 
Example 15
Source File: MaxNormConstraint.java    From deeplearning4j with Apache License 2.0 5 votes vote down vote up
@Override
public void apply(INDArray param){
    INDArray norm = param.norm2(dimensions);
    INDArray clipped = norm.unsafeDuplication();
    BooleanIndexing.replaceWhere(clipped, maxNorm, Conditions.greaterThan(maxNorm));
    norm.addi(epsilon);
    clipped.divi(norm);

    Broadcast.mul(param, clipped, param, getBroadcastDims(dimensions, param.rank()) );
}
 
Example 16
Source File: UnitNormConstraint.java    From deeplearning4j with Apache License 2.0 4 votes vote down vote up
@Override
public void apply(INDArray param) {
    INDArray norm2 = param.norm2(dimensions);
    Broadcast.div(param, norm2, param, getBroadcastDims(dimensions, param.rank()) );
}
 
Example 17
Source File: ReductionOpValidation.java    From deeplearning4j with Apache License 2.0 4 votes vote down vote up
@Test
public void testMultiHeadedDotProductAttentionWeirdInputs(){
    final INDArray k = Nd4j.rand(new int[]{10, 4, 5});
    final INDArray v = Nd4j.rand(new int[]{10, 4, 5});
    final INDArray q = Nd4j.rand(new int[]{10, 4, 2});

    final INDArray Wk = Nd4j.rand(new int[]{2, 3, 4});
    final INDArray Wv = Nd4j.rand(new int[]{2, 3, 4});
    final INDArray Wq = Nd4j.rand(new int[]{2, 3, 4});
    final INDArray Wo = Nd4j.rand(new int[]{2* 3, 8});

    final INDArray mask = Nd4j.rand(10, 5).gte(0.2).castTo(DataType.DOUBLE);

    final INDArray kP = Nd4j.tensorMmul(k, Wk, new int[][]{{1}, {2}}).permutei(0, 2, 3, 1);
    final INDArray vP = Nd4j.tensorMmul(v, Wv, new int[][]{{1}, {2}}).permutei(0, 2, 3, 1);
    final INDArray qP = Nd4j.tensorMmul(q, Wq, new int[][]{{1}, {2}}).permutei(0, 2, 3, 1);

    final DynamicCustomOp dot_product_attention = DynamicCustomOp
            .builder("dot_product_attention")
            .addInputs(qP, kP, vP, mask)
            .addIntegerArguments(1, 0)
            .build();

    final INDArray[] outputs = Nd4j.exec(dot_product_attention);
    final INDArray attOut = outputs[0].permutei(0, 3, 1, 2).reshape(k.size(0), q.size(2), Wv.size(0) * Wv.size(1));

    final INDArray out = Nd4j.tensorMmul(attOut, Wo, new int[][]{{2}, {0}}).permutei(0, 2, 1);
    final INDArray finalOut = out.norm2();

    for (char orderWeights: new char[]{'f', 'c'}){
        for (char orderInput: new char[]{'f', 'c'}){
            log.info("-*- Starting Test: input Order = {}, weightOrder = {} -*-", orderInput, orderWeights);


            SameDiff sd = SameDiff.create();
            SDVariable sdQ = sd.var("q", q.dup(orderInput));
            SDVariable sdK = sd.var("k", k.dup(orderInput));
            SDVariable sdV = sd.var("v", v.dup(orderInput));
            SDVariable sdWq = sd.var("Wq", Wq.dup(orderWeights));
            SDVariable sdWk = sd.var("Wk", Wk.dup(orderWeights));
            SDVariable sdWv = sd.var("Wv", Wv.dup(orderWeights));
            SDVariable sdWo = sd.var("Wo", Wo.dup(orderWeights));
            SDVariable sdMask = sd.constant("mask", mask);


            SDVariable t = sd.nn.multiHeadDotProductAttention(sdQ, sdK, sdV, sdWq, sdWk, sdWv, sdWo, sdMask, true);
            t.norm2("out");

            String err = OpValidation.validate(new TestCase(sd)
                    .expectedOutput("out", finalOut)
                    .gradientCheck(false)
                    .gradCheckSkipVariables("mask"));

            assertNull(err);
        }
    }
}
 
Example 18
Source File: MiscOpValidation.java    From deeplearning4j with Apache License 2.0 4 votes vote down vote up
@Test
public void testClipByNorm2(){
    //Expected: if array.norm2(1) is less than 1.0, not modified
    //Otherwise: array.tad(x,1) = array.tad(x,1) * 1.0 / array.tad(x,1).norm2()

    Nd4j.getRandom().setSeed(12345);
    INDArray arr = Nd4j.rand(3,5);
    INDArray norm2_1 = arr.norm2(1);
    arr.diviColumnVector(norm2_1);

    norm2_1 = arr.norm2(1);
    assertEquals(Nd4j.ones(3), norm2_1);

    INDArray scale = Nd4j.create(new double[]{1.1, 1.0, 0.9}, new int[]{3,1});
    arr.muliColumnVector(scale);
    norm2_1 = arr.norm2(1);

    INDArray out = Nd4j.createUninitialized(arr.shape());

    OpTestCase op = new OpTestCase(DynamicCustomOp.builder("clipbynorm")
            .addInputs(arr)
            .addOutputs(out)
            .addIntegerArguments(1)
            .addFloatingPointArguments(1.0)
            .build());

    INDArray expNorm2 = Nd4j.create(new double[]{1.0, 1.0, norm2_1.getDouble(2)}, new int[]{3,1});

    INDArray expOut = arr.divColumnVector(norm2_1).muliColumnVector(expNorm2);
    op.expectedOutput(0, expOut);

    System.out.println("Input");
    System.out.println(arr.shapeInfoToString());
    System.out.println(Arrays.toString(arr.data().asFloat()));

    System.out.println("Expected");
    System.out.println(expOut.shapeInfoToString());
    System.out.println(Arrays.toString(expOut.data().asFloat()));

    String err = OpValidation.validate(op);
    assertNull(err);
}
 
Example 19
Source File: ReductionOpValidation.java    From deeplearning4j with Apache License 2.0 4 votes vote down vote up
@Test
public void testMultiHeadedDotProductAttention(){
    final INDArray k = Nd4j.rand(new int[]{10, 4, 5});
    final INDArray v = Nd4j.rand(new int[]{10, 4, 5});
    final INDArray q = Nd4j.rand(new int[]{10, 4, 2});

    final INDArray Wk = Nd4j.rand(new int[]{2, 3, 4});
    final INDArray Wv = Nd4j.rand(new int[]{2, 3, 4});
    final INDArray Wq = Nd4j.rand(new int[]{2, 3, 4});
    final INDArray Wo = Nd4j.rand(new int[]{2* 3, 8});

    final INDArray kP = Nd4j.tensorMmul(k, Wk, new int[][]{{1}, {2}}).permutei(0, 2, 3, 1);
    final INDArray vP = Nd4j.tensorMmul(v, Wv, new int[][]{{1}, {2}}).permutei(0, 2, 3, 1);
    final INDArray qP = Nd4j.tensorMmul(q, Wq, new int[][]{{1}, {2}}).permutei(0, 2, 3, 1);

    final INDArray mask = Nd4j.rand(10, 5).gte(0.2).castTo(DataType.DOUBLE);

    final DynamicCustomOp dot_product_attention = DynamicCustomOp
            .builder("dot_product_attention")
            .addInputs(qP, kP, vP, mask)
            .addIntegerArguments(1, 0)
            .build();

    final INDArray[] outputs = Nd4j.exec(dot_product_attention);
    final INDArray attOut = outputs[0].permutei(0, 3, 1, 2).reshape(k.size(0), q.size(2), Wv.size(0) * Wv.size(1));

    final INDArray out = Nd4j.tensorMmul(attOut, Wo, new int[][]{{2}, {0}}).permutei(0, 2, 1);
    final INDArray finalOut = out.norm2();

    SameDiff sd = SameDiff.create();
    SDVariable sdQ = sd.var("q", q);
    SDVariable sdK = sd.var("k", k);
    SDVariable sdV = sd.var("v", v);
    SDVariable sdWq = sd.var("Wq", Wq);
    SDVariable sdWk = sd.var("Wk", Wk);
    SDVariable sdWv = sd.var("Wv", Wv);
    SDVariable sdWo = sd.var("Wo", Wo);
    SDVariable sdMask = sd.constant("mask", mask);


    SDVariable t = sd.nn.multiHeadDotProductAttention(sdQ, sdK, sdV, sdWq, sdWk, sdWv, sdWo, sdMask, true);
    t.norm2("out");

    String err = OpValidation.validate(new TestCase(sd)
            .expectedOutput("out", finalOut)
            .gradientCheck(true)
            .gradCheckSkipVariables("mask"));

    assertNull(err);
}
 
Example 20
Source File: SporadicTests.java    From nd4j with Apache License 2.0 2 votes vote down vote up
@Test
    public void testDebugEdgeCase2(){
        DataTypeUtil.setDTypeForContext(DataBuffer.Type.DOUBLE);
        INDArray l1 = Nd4j.create(new double[]{-0.2585039112684677,-0.005179485353710878,0.4348343401770497,0.020356532375728764,-0.1970793298488186});
        INDArray l2 = Nd4j.create(2,l1.size(1));

        INDArray p1 = Nd4j.create(new double[]{1.3979850406519119,0.6169451410155852,1.128993957530918,0.21000426084450596,0.3171215178932696});
        INDArray p2 = Nd4j.create(2, p1.size(1));

        for( int i=0; i<2; i++ ){
            l2.putRow(i, l1);
            p2.putRow(i, p1);
        }

        INDArray norm2_1 = l1.norm2(1);
        System.out.println("Queue: " + ((CudaGridExecutioner) Nd4j.getExecutioner()).getQueueLength());

        INDArray temp1 = p1.mul(l1);

        System.out.println("Queue: " + ((CudaGridExecutioner) Nd4j.getExecutioner()).getQueueLength());

//        if (Nd4j.getExecutioner() instanceof CudaGridExecutioner)
//            ((CudaGridExecutioner) Nd4j.getExecutioner()).flushQueueBlocking();

        INDArray out1 = temp1.diviColumnVector(norm2_1);
        System.out.println("------");

        Nd4j.getExecutioner().commit();

        INDArray norm2_2 = l2.norm2(1);

        System.out.println("norm2_1: " + Arrays.toString(norm2_1.data().asDouble()));
        System.out.println("norm2_2: " + Arrays.toString(norm2_2.data().asDouble()));

        INDArray temp2 = p2.mul(l2);



        System.out.println("temp1: " + Arrays.toString(temp1.data().asDouble()));
        System.out.println("temp2: " + Arrays.toString(temp2.data().asDouble()));

        INDArray out2 = temp2.diviColumnVector(norm2_2);

        //Outputs here should be identical:
        System.out.println(Arrays.toString(out1.data().asDouble()));
        System.out.println(Arrays.toString(out2.getRow(0).dup().data().asDouble()));
    }