Java Code Examples for org.nd4j.linalg.api.ndarray.INDArray#putColumn()

The following examples show how to use org.nd4j.linalg.api.ndarray.INDArray#putColumn() . You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.

Example 1

Source File: PCA.java From deeplearning4j with Apache License 2.0

6 votes

/**
 * Return a reduced basis set that covers a certain fraction of the variance of the data
 * @param variance The desired fractional variance (0 to 1), it will always be greater than the value.
 * @return The basis vectors as columns, size <i>N</i> rows by <i>ndims</i> columns, where <i>ndims</i> is less than or equal to <i>N</i>
 */
public INDArray reducedBasis(double variance) {
    INDArray vars = Transforms.pow(eigenvalues, -0.5, true);
    double res = vars.sumNumber().doubleValue();
    double total = 0.0;
    int ndims = 0;
    for (int i = 0; i < vars.columns(); i++) {
        ndims++;
        total += vars.getDouble(i);
        if (total / res > variance)
            break;
    }
    INDArray result = Nd4j.create(eigenvectors.rows(), ndims);
    for (int i = 0; i < ndims; i++)
        result.putColumn(i, eigenvectors.getColumn(i));
    return result;
}

Example 2

Source File: NormalizerStandardizeLabelsTest.java From nd4j with Apache License 2.0

6 votes

public genRandomDataSet(int nSamples, int nFeatures, int a, int b, long randSeed) {
    /* if a =1 and b = 0,normal distribution
        otherwise with some random mean and some random distribution
     */
    int i = 0;
    // Randomly generate scaling constants and add offsets
    // to get aA and bB
    INDArray aA = a == 1 ? Nd4j.ones(1, nFeatures) : Nd4j.rand(1, nFeatures, randSeed).mul(a); //a = 1, don't scale
    INDArray bB = Nd4j.rand(1, nFeatures, randSeed).mul(b); //b = 0 this zeros out
    // transform ndarray as X = aA + bB * X
    INDArray randomFeatures = Nd4j.zeros(nSamples, nFeatures);
    while (i < nFeatures) {
        INDArray randomSlice = Nd4j.randn(nSamples, 1, randSeed);
        randomSlice.muli(aA.getScalar(0, i));
        randomSlice.addi(bB.getScalar(0, i));
        randomFeatures.putColumn(i, randomSlice);
        i++;
    }
    INDArray randomLabels = randomFeatures.dup();
    this.sampleDataSet = new DataSet(randomFeatures, randomLabels);
    this.theoreticalMean = bB.dup();
    this.theoreticalStd = aA.dup();
    this.theoreticalSEM = this.theoreticalStd.div(Math.sqrt(nSamples));
}

Example 3

Source File: NormalizerStandardizeLabelsTest.java From deeplearning4j with Apache License 2.0

6 votes

public genRandomDataSet(int nSamples, int nFeatures, int a, int b, long randSeed) {
    /* if a =1 and b = 0,normal distribution
        otherwise with some random mean and some random distribution
     */
    int i = 0;
    // Randomly generate scaling constants and add offsets
    // to get aA and bB
    INDArray aA = a == 1 ? Nd4j.ones(1, nFeatures) : Nd4j.rand(new int[]{1, nFeatures}, randSeed).mul(a); //a = 1, don't scale
    INDArray bB = Nd4j.rand(new int[]{1, nFeatures}, randSeed).mul(b); //b = 0 this zeros out
    // transform ndarray as X = aA + bB * X
    INDArray randomFeatures = Nd4j.zeros(nSamples, nFeatures);
    while (i < nFeatures) {
        INDArray randomSlice = Nd4j.randn(randSeed, new long[]{nSamples, 1});
        randomSlice.muli(aA.getScalar(0, i));
        randomSlice.addi(bB.getScalar(0, i));
        randomFeatures.putColumn(i, randomSlice);
        i++;
    }
    INDArray randomLabels = randomFeatures.dup();
    this.sampleDataSet = new DataSet(randomFeatures, randomLabels);
    this.theoreticalMean = bB.dup();
    this.theoreticalStd = aA.dup();
    this.theoreticalSEM = this.theoreticalStd.div(Math.sqrt(nSamples));
}

Example 4

Source File: ReductionBpOpValidation.java From deeplearning4j with Apache License 2.0

6 votes

@Test
public void testMeanAlongDim1BP() {
    //Reduction along dimension
    //Inputs/outputs as before - but note that the output is no longer a scalar

    //Note: when reducing [3,4] along dimension 1 -> 3 TADs of length 4 -> N=4 -> dL/dIn_i = dL/dOut * 1/4
    //We have one epsilon/gradient for each of the 3 TADs -> dL/dOut length is 3

    for (boolean keepDims : new boolean[]{false, true}) {
        INDArray preReduceInput = Nd4j.linspace(1, 12, 12).reshape(3, 4);

        long[] reducedShape_1 = (keepDims ? new long[]{3, 1} : new long[]{3});
        INDArray dLdOut_1 = Nd4j.create(new double[]{1, 2, 3}, reducedShape_1);
        INDArray dLdInExpected_1 = Nd4j.createUninitialized(preReduceInput.shape());
        for (int i = 0; i < 4; i++) {
            dLdInExpected_1.putColumn(i, dLdOut_1.div(4));
        }

        INDArray dLdIn = Nd4j.createUninitialized(3, 4);

        String err = OpValidation.validate(new OpTestCase(new MeanBp(preReduceInput, dLdOut_1, dLdIn, keepDims, 1))
                .expectedOutput(0, dLdInExpected_1));

        assertNull(err);
    }
}

Example 5

Source File: ReductionBpOpValidation.java From deeplearning4j with Apache License 2.0

6 votes

@Test
public void testReduceSumAlongDim1BP() {
    //Reduction along dimension
    //Inputs/outputs as before - but note that the output is no longer a scalar

    //Note: when reducing [3,4] along dimension 1 -> 3 TADs of length 4
    //We have one epsilon/gradient for each of the 3 TADs -> dL/dOut length is 3

    for (boolean keepDims : new boolean[]{false, true}) {
        INDArray preReduceInput = Nd4j.linspace(1, 12, 12).reshape(3, 4);

        long[] reducedShape_1 = (keepDims ? new long[]{3, 1} : new long[]{3});
        INDArray dLdOut_1 = Nd4j.create(new double[]{1, 2, 3}, reducedShape_1);
        INDArray dLdInExpected_1 = Nd4j.createUninitialized(preReduceInput.shape());
        for (int i = 0; i < 4; i++) {
            dLdInExpected_1.putColumn(i, dLdOut_1);
        }

        INDArray dLdIn = Nd4j.createUninitialized(3, 4);

        String err = OpValidation.validate(new OpTestCase(new SumBp(preReduceInput, dLdOut_1, dLdIn, keepDims, 1))
                .expectedOutput(0, dLdInExpected_1));

        assertNull(err);
    }
}

Example 6

Source File: PCA.java From deeplearning4j with Apache License 2.0

6 votes

/**
 * This method performs a dimensionality reduction, including principal components
 * that cover a fraction of the total variance of the system.  It does all calculations
 * about the mean.
 * @param in A matrix of datapoints as rows, where column are features with fixed number N
 * @param variance The desired fraction of the total variance required
 * @return The reduced basis set
 */
public static INDArray pca2(INDArray in, double variance) {
    // let's calculate the covariance and the mean
    INDArray[] covmean = covarianceMatrix(in);
    // use the covariance matrix (inverse) to find "force constants" and then break into orthonormal
    // unit vector components
    INDArray[] pce = principalComponents(covmean[0]);
    // calculate the variance of each component
    INDArray vars = Transforms.pow(pce[1], -0.5, true);
    double res = vars.sumNumber().doubleValue();
    double total = 0.0;
    int ndims = 0;
    for (int i = 0; i < vars.columns(); i++) {
        ndims++;
        total += vars.getDouble(i);
        if (total / res > variance)
            break;
    }
    INDArray result = Nd4j.create(in.columns(), ndims);
    for (int i = 0; i < ndims; i++)
        result.putColumn(i, pce[0].getColumn(i));
    return result;
}

Example 7

Source File: PCA.java From nd4j with Apache License 2.0

6 votes

/**
 * This method performs a dimensionality reduction, including principal components
 * that cover a fraction of the total variance of the system.  It does all calculations
 * about the mean.
 * @param in A matrix of datapoints as rows, where column are features with fixed number N
 * @param variance The desired fraction of the total variance required
 * @return The reduced basis set
 */
public static INDArray pca2(INDArray in, double variance) {
    // let's calculate the covariance and the mean
    INDArray[] covmean = covarianceMatrix(in);
    // use the covariance matrix (inverse) to find "force constants" and then break into orthonormal
    // unit vector components
    INDArray[] pce = principalComponents(covmean[0]);
    // calculate the variance of each component
    INDArray vars = Transforms.pow(pce[1], -0.5, true);
    double res = vars.sumNumber().doubleValue();
    double total = 0.0;
    int ndims = 0;
    for (int i = 0; i < vars.columns(); i++) {
        ndims++;
        total += vars.getDouble(i);
        if (total / res > variance)
            break;
    }
    INDArray result = Nd4j.create(in.columns(), ndims);
    for (int i = 0; i < ndims; i++)
        result.putColumn(i, pce[0].getColumn(i));
    return result;
}

Example 8

Source File: BaseLapack.java From deeplearning4j with Apache License 2.0

5 votes

@Override
public INDArray getPFactor(int M, INDArray ipiv) {
    // The simplest permutation is the identity matrix
    INDArray P = Nd4j.eye(M); // result is a square matrix with given size
    for (int i = 0; i < ipiv.length(); i++) {
        int pivot = ipiv.getInt(i) - 1; // Did we swap row #i with anything?
        if (pivot > i) { // don't reswap when we get lower down in the vector
            INDArray v1 = P.getColumn(i).dup(); // because of row vs col major order we'll ...
            INDArray v2 = P.getColumn(pivot); // ... make a transposed matrix immediately
            P.putColumn(i, v2);
            P.putColumn(pivot, v1); // note dup() above is required - getColumn() is a 'view'
        }
    }
    return P; // the permutation matrix - contains a single 1 in any row and column
}

Example 9

Source File: PCA.java From deeplearning4j with Apache License 2.0

5 votes

/**
 * Calculates pca factors of a matrix, for a flags number of reduced features
 * returns the factors to scale observations 
 *
 * The return is a factor matrix to reduce (normalized) feature sets
 *
 * @see pca(INDArray, int, boolean)
 *
 * @param A the array of features, rows are results, columns are features - will be changed
 * @param nDims the number of components on which to project the features 
 * @param normalize whether to normalize (adjust each feature to have zero mean)
 * @return the reduced feature set
 */
public static INDArray pca_factor(INDArray A, int nDims, boolean normalize) {

    if (normalize) {
        // Normalize to mean 0 for each feature ( each column has 0 mean )
        INDArray mean = A.mean(0);
        A.subiRowVector(mean);
    }

    long m = A.rows();
    long n = A.columns();

    // The prepare SVD results, we'll decomp A to UxSxV'
    INDArray s = Nd4j.create(A.dataType(), m < n ? m : n);
    INDArray VT = Nd4j.create(A.dataType(), new long[]{n, n}, 'f');

    // Note - we don't care about U 
    Nd4j.getBlasWrapper().lapack().gesvd(A, s, null, VT);

    // for comparison k & nDims are the equivalent values in both methods implementing PCA

    // So now let's rip out the appropriate number of left singular vectors from
    // the V output (note we pulls rows since VT is a transpose of V)
    INDArray V = VT.transpose();
    INDArray factor = Nd4j.create(A.dataType(),new long[]{n, nDims}, 'f');
    for (int i = 0; i < nDims; i++) {
        factor.putColumn(i, V.getColumn(i));
    }

    return factor;
}

Example 10

Source File: BaseLapack.java From nd4j with Apache License 2.0

5 votes

@Override
public INDArray getPFactor(int M, INDArray ipiv) {
    // The simplest permutation is the identity matrix
    INDArray P = Nd4j.eye(M); // result is a square matrix with given size
    for (int i = 0; i < ipiv.length(); i++) {
        int pivot = ipiv.getInt(i) - 1; // Did we swap row #i with anything?
        if (pivot > i) { // don't reswap when we get lower down in the vector
            INDArray v1 = P.getColumn(i).dup(); // because of row vs col major order we'll ...
            INDArray v2 = P.getColumn(pivot); // ... make a transposed matrix immediately
            P.putColumn(i, v2);
            P.putColumn(pivot, v1); // note dup() above is required - getColumn() is a 'view'
        }
    }
    return P; // the permutation matrix - contains a single 1 in any row and column
}

Example 11

Source File: GravesBidirectionalLSTMTest.java From deeplearning4j with Apache License 2.0

5 votes

static private void reverseColumnsInPlace(final INDArray x) {
    final long N = x.size(1);
    final INDArray x2 = x.dup();

    for (int t = 0; t < N; t++) {
        final long b = N - t - 1;
        //clone?
        x.putColumn(t, x2.getColumn(b));
    }
}

Example 12

Source File: NDArrayTestsFortran.java From nd4j with Apache License 2.0

5 votes

@Test
public void testColumns() {
    INDArray arr = Nd4j.create(new long[] {3, 2});
    INDArray column2 = arr.getColumn(0);
    //assertEquals(true, Shape.shapeEquals(new long[]{3, 1}, column2.shape()));
    INDArray column = Nd4j.create(new double[] {1, 2, 3}, new long[] {1, 3});
    arr.putColumn(0, column);

    INDArray firstColumn = arr.getColumn(0);

    assertEquals(column, firstColumn);


    INDArray column1 = Nd4j.create(new double[] {4, 5, 6}, new long[] {1, 3});
    arr.putColumn(1, column1);
    INDArray testRow1 = arr.getColumn(1);
    assertEquals(column1, testRow1);


    INDArray evenArr = Nd4j.create(new double[] {1, 2, 3, 4}, new long[] {2, 2});
    INDArray put = Nd4j.create(new double[] {5, 6}, new long[] {1, 2});
    evenArr.putColumn(1, put);
    INDArray testColumn = evenArr.getColumn(1);
    assertEquals(put, testColumn);


    INDArray n = Nd4j.create(Nd4j.linspace(1, 4, 4).data(), new long[] {2, 2});
    INDArray column23 = n.getColumn(0);
    INDArray column12 = Nd4j.create(new double[] {1, 2}, new long[] {1, 2});
    assertEquals(column23, column12);


    INDArray column0 = n.getColumn(1);
    INDArray column01 = Nd4j.create(new double[] {3, 4}, new long[] {1, 2});
    assertEquals(column0, column01);


}

Example 13

Source File: NormalizerStandardizeTest.java From nd4j with Apache License 2.0

5 votes

public genRandomDataSet(int nSamples, int nFeatures, int a, int b, long randSeed) {
    /* if a =1 and b = 0,normal distribution
        otherwise with some random mean and some random distribution
     */
    int i = 0;
    // Randomly generate scaling constants and add offsets
    // to get aA and bB
    INDArray aA = a == 1 ? Nd4j.ones(1, nFeatures) : Nd4j.rand(1, nFeatures, randSeed).mul(a); //a = 1, don't scale
    INDArray bB = Nd4j.rand(1, nFeatures, randSeed).mul(b); //b = 0 this zeros out
    // transform ndarray as X = aA + bB * X
    INDArray randomFeatures = Nd4j.zeros(nSamples, nFeatures);
    INDArray randomFeaturesTransform = Nd4j.zeros(nSamples, nFeatures);
    while (i < nFeatures) {
        INDArray randomSlice = Nd4j.randn(nSamples, 1, randSeed);
        randomFeaturesTransform.putColumn(i, randomSlice);
        randomSlice.muli(aA.getScalar(0, i));
        randomSlice.addi(bB.getScalar(0, i));
        randomFeatures.putColumn(i, randomSlice);
        i++;
    }
    INDArray randomLabels = Nd4j.zeros(nSamples, 1);
    this.sampleDataSet = new DataSet(randomFeatures, randomLabels);
    this.theoreticalTransform = new DataSet(randomFeaturesTransform, randomLabels);
    this.theoreticalMean = bB;
    this.theoreticalStd = aA;
    this.theoreticalSEM = this.theoreticalStd.div(Math.sqrt(nSamples));
}

Example 14

Source File: VpTreeNodeTest.java From deeplearning4j with Apache License 2.0

5 votes

public static INDArray generateNaturalsMatrix(int nrows, int ncols) {
    INDArray col = Nd4j.arange(0, nrows).reshape(nrows, 1).castTo(DataType.DOUBLE);
    INDArray points = Nd4j.create(DataType.DOUBLE, nrows, ncols);
    if (points.isColumnVectorOrScalar())
        points = col.dup();
    else {
        for (int i = 0; i < ncols; i++)
            points.putColumn(i, col);
    }
    return points;
}

Example 15

Source File: TestSparkComputationGraph.java From deeplearning4j with Apache License 2.0

4 votes

@Test
public void testEvaluationAndRocMDS() {
    for( int evalWorkers : new int[]{1, 4, 8}) {

        DataSetIterator iter = new IrisDataSetIterator(5, 150);

        //Make a 2-class version of iris:
        List<MultiDataSet> l = new ArrayList<>();
        iter.reset();
        while (iter.hasNext()) {
            DataSet ds = iter.next();
            INDArray newL = Nd4j.create(ds.getLabels().size(0), 2);
            newL.putColumn(0, ds.getLabels().getColumn(0));
            newL.putColumn(1, ds.getLabels().getColumn(1));
            newL.getColumn(1).addi(ds.getLabels().getColumn(2));

            MultiDataSet mds = new org.nd4j.linalg.dataset.MultiDataSet(ds.getFeatures(), newL);
            l.add(mds);
        }

        MultiDataSetIterator mdsIter = new IteratorMultiDataSetIterator(l.iterator(), 5);

        ComputationGraph cg = getBasicNetIris2Class();

        IEvaluation[] es = cg.doEvaluation(mdsIter, new Evaluation(), new ROC(32));
        Evaluation e = (Evaluation) es[0];
        ROC roc = (ROC) es[1];


        SparkComputationGraph scg = new SparkComputationGraph(sc, cg, null);
        scg.setDefaultEvaluationWorkers(evalWorkers);

        JavaRDD<MultiDataSet> rdd = sc.parallelize(l);
        rdd = rdd.repartition(20);

        IEvaluation[] es2 = scg.doEvaluationMDS(rdd, 5, new Evaluation(), new ROC(32));
        Evaluation e2 = (Evaluation) es2[0];
        ROC roc2 = (ROC) es2[1];


        assertEquals(e2.accuracy(), e.accuracy(), 1e-3);
        assertEquals(e2.f1(), e.f1(), 1e-3);
        assertEquals(e2.getNumRowCounter(), e.getNumRowCounter(), 1e-3);
        assertEquals(e2.falseNegatives(), e.falseNegatives());
        assertEquals(e2.falsePositives(), e.falsePositives());
        assertEquals(e2.trueNegatives(), e.trueNegatives());
        assertEquals(e2.truePositives(), e.truePositives());
        assertEquals(e2.precision(), e.precision(), 1e-3);
        assertEquals(e2.recall(), e.recall(), 1e-3);
        assertEquals(e2.getConfusionMatrix(), e.getConfusionMatrix());

        assertEquals(roc.calculateAUC(), roc2.calculateAUC(), 1e-5);
        assertEquals(roc.calculateAUCPR(), roc2.calculateAUCPR(), 1e-5);
    }
}

Example 16

Source File: ReductionBpOpValidation.java From deeplearning4j with Apache License 2.0

4 votes

@Test
public void testProdAlongDimensionBP() {
    //dL/dIn_i  = dL/dOut * dOut/dIn_i
    //          = dL/dOut * d(prod(in))/dIn_i
    //          = dL/dOut * (prod(in) / in_i)

    for (boolean keepDims : new boolean[]{false, true}) {
        long[] reducedShape_0 = (keepDims ? new long[]{1, 4} : new long[]{4});
        INDArray preReduceInput = Nd4j.linspace(1, 12, 12).reshape(3, 4);
        INDArray prod_0 = preReduceInput.prod(0);
        INDArray dLdOut_0 = Nd4j.create(new double[]{1, 2, 3, 4}, reducedShape_0);
        INDArray dLdInExpected_0 = Nd4j.create(3, 4);
        for (int i = 0; i < 3; i++) {
            dLdInExpected_0.putRow(i, prod_0);
        }
        dLdInExpected_0.divi(preReduceInput);   //Currently: prod(in)/in_i (along dim 0)
        dLdInExpected_0.muliRowVector(dLdOut_0);
        //System.out.println(dLdInExpected_0);
        /*
        [[   45.0000,  120.0000,  231.0000,  384.0000],
         [    9.0000,   40.0000,   99.0000,  192.0000],
         [    5.0000,   24.0000,   63.0000,  128.0000]]
         */

        INDArray dLdIn = Nd4j.createUninitialized(3, 4);

        String err = OpValidation.validate(new OpTestCase(new ProdBp(preReduceInput, dLdOut_0, dLdIn, keepDims, 0))
                .expectedOutput(0, dLdInExpected_0));
        assertNull(err);


        long[] reducedShape_1 = (keepDims ? new long[]{3, 1} : new long[]{3});
        INDArray dLdOut_1 = Nd4j.create(new double[]{1, 2, 3}, reducedShape_1);
        INDArray prod_1 = preReduceInput.prod(1);
        INDArray dLdInExpected_1 = Nd4j.create(3, 4);
        for (int i = 0; i < 4; i++) {
            dLdInExpected_1.putColumn(i, prod_1);
        }
        dLdInExpected_1.divi(preReduceInput);
        dLdInExpected_1.muliColumnVector(dLdOut_1.reshape(3, 1));    //Reshape is a hack around https://github.com/deeplearning4j/deeplearning4j/issues/5530
        //System.out.println(dLdInExpected_1);
        /*
        [[   24.0000,   12.0000,    8.0000,    6.0000],
         [  672.0000,  560.0000,  480.0000,  420.0000],
         [ 3960.0000, 3564.0000, 3240.0000, 2970.0000]]
         */


        dLdIn = Nd4j.createUninitialized(3, 4);
        err = OpValidation.validate(new OpTestCase(new ProdBp(preReduceInput, dLdOut_1, dLdIn, keepDims, 1))
                .expectedOutput(0, dLdInExpected_1));

        assertNull(err, err);
    }
}

Example 17

Source File: TestSparkComputationGraph.java From deeplearning4j with Apache License 2.0

4 votes

@Test(timeout = 60000L)
public void testEvaluationAndRoc() {
    for( int evalWorkers : new int[]{1, 4, 8}) {
        DataSetIterator iter = new IrisDataSetIterator(5, 150);

        //Make a 2-class version of iris:
        List<DataSet> l = new ArrayList<>();
        iter.reset();
        while (iter.hasNext()) {
            DataSet ds = iter.next();
            INDArray newL = Nd4j.create(ds.getLabels().size(0), 2);
            newL.putColumn(0, ds.getLabels().getColumn(0));
            newL.putColumn(1, ds.getLabels().getColumn(1));
            newL.getColumn(1).addi(ds.getLabels().getColumn(2));
            ds.setLabels(newL);
            l.add(ds);
        }

        iter = new ListDataSetIterator<>(l);

        ComputationGraph cg = getBasicNetIris2Class();

        Evaluation e = cg.evaluate(iter);
        ROC roc = cg.evaluateROC(iter, 32);


        SparkComputationGraph scg = new SparkComputationGraph(sc, cg, null);
        scg.setDefaultEvaluationWorkers(evalWorkers);


        JavaRDD<DataSet> rdd = sc.parallelize(l);
        rdd = rdd.repartition(20);

        Evaluation e2 = scg.evaluate(rdd);
        ROC roc2 = scg.evaluateROC(rdd);


        assertEquals(e2.accuracy(), e.accuracy(), 1e-3);
        assertEquals(e2.f1(), e.f1(), 1e-3);
        assertEquals(e2.getNumRowCounter(), e.getNumRowCounter(), 1e-3);
        assertEquals(e2.falseNegatives(), e.falseNegatives());
        assertEquals(e2.falsePositives(), e.falsePositives());
        assertEquals(e2.trueNegatives(), e.trueNegatives());
        assertEquals(e2.truePositives(), e.truePositives());
        assertEquals(e2.precision(), e.precision(), 1e-3);
        assertEquals(e2.recall(), e.recall(), 1e-3);
        assertEquals(e2.getConfusionMatrix(), e.getConfusionMatrix());

        assertEquals(roc.calculateAUC(), roc2.calculateAUC(), 1e-5);
        assertEquals(roc.calculateAUCPR(), roc2.calculateAUCPR(), 1e-5);
    }
}

Example 18

Source File: RelationalDataSetIterator.java From wekaDeeplearning4j with GNU General Public License v3.0

4 votes

@Override
public DataSet next(int num) {
  List<Instances> currentBatch = new ArrayList<>(num);
  List<Double> lbls = new ArrayList<>(num);

  for (int i = 0; i < num && cursor + i < data.numInstances(); i++) {
    currentBatch.add(data.get(cursor + i).relationalValue(relationalAttributeIndex));
    lbls.add(data.get(cursor + i).classValue());
  }

  final int currentBatchSize = currentBatch.size();

  int maxLength = 0;
  for (Instances instances : currentBatch) {
    maxLength = Math.max(maxLength, instances.numInstances());
  }

  // If longest instance exceeds 'truncateLength': only take the first 'truncateLength' instances
  if (maxLength > truncateLength || maxLength == 0) {
    maxLength = truncateLength;
  }

  // Create data for training
  INDArray features = Nd4j.create(new int[]{currentBatchSize, numFeatures, maxLength}, 'f');
  INDArray labels = Nd4j.create(new int[]{currentBatchSize, data.numClasses(), maxLength}, 'f');

  // Because we are dealing with instances of different lengths and only one output at the final
  // time step: use padding arrays
  // Mask arrays contain 1 if data is present at that time step for that example, or 0 if data is
  // just padding
  INDArray featuresMask = Nd4j.zeros(currentBatchSize, maxLength);
  INDArray labelsMask = Nd4j.zeros(currentBatchSize, maxLength);

  for (int i = 0; i < currentBatchSize; i++) {
    Instances currInstances = currentBatch.get(i);

    // Check for empty row
    final int currNumInstances = currInstances.numInstances();
    if (currNumInstances == 0) {
      continue;
    }

    // Get the sequence length of row (i)
    int lastIdx = Math.min(currNumInstances, maxLength);

    // Matrix that will represent the current row/instances object
    INDArray currDataND = Nd4j.create(numFeatures, lastIdx);

    // Iterate over truncated number of instances for the current row
    for (int j = 0; j < lastIdx; j++) {
      // Get as double array
      final double[] doubles = currInstances.get(j).toDoubleArray();
      final INDArray indArray = Nd4j.create(doubles);
      currDataND.putColumn(j, indArray);
    }

    features.put(new INDArrayIndex[]{point(i), all(), interval(0, lastIdx)}, currDataND);

    // Assign "1" to each position where a feature is present, that is, in the interval of
    // [0, lastIdx)
    featuresMask.get(new INDArrayIndex[]{point(i), interval(0, lastIdx)}).assign(1);

    /*
     Put the labels in the labels and labelsMask arrays
    */

    // Differ between classification and regression task
    if (data.numClasses() == 1) { // Regression
      double val = lbls.get(i);
      labels.putScalar(new int[]{i, 0, lastIdx - 1}, val);
    } else if (data.numClasses() > 1) { // Classification
      // One-Hot-Encoded class
      int idx = lbls.get(i).intValue();
      // Set label
      labels.putScalar(new int[]{i, idx, lastIdx - 1}, 1.0);
    } else {
      throw new RuntimeException("Could not detect classification or regression task.");
    }

    // Specify that an output exists at the final time step for this example
    labelsMask.putScalar(new int[]{i, lastIdx - 1}, 1.0);
  }

  // Cache the dataset
  final DataSet ds = new DataSet(features, labels, featuresMask, labelsMask);

  // Move cursor
  cursor += ds.numExamples();
  return ds;
}

Example 19

Source File: PCA.java From deeplearning4j with Apache License 2.0

4 votes

/**
 * Calculates pca vectors of a matrix, for a given variance. A larger variance (99%)
 * will result in a higher order feature set.
 *
 * To use the returned factor: multiply feature(s) by the factor to get a reduced dimension
 *
 * INDArray Areduced = A.mmul( factor ) ;
 * 
 * The array Areduced is a projection of A onto principal components
 *
 * @see pca(INDArray, double, boolean)
 *
 * @param A the array of features, rows are results, columns are features - will be changed
 * @param variance the amount of variance to preserve as a float 0 - 1
 * @param normalize whether to normalize (set features to have zero mean)
 * @return the matrix to mulitiply a feature by to get a reduced feature set
 */
public static INDArray pca_factor(INDArray A, double variance, boolean normalize) {
    if (normalize) {
        // Normalize to mean 0 for each feature ( each column has 0 mean )
        INDArray mean = A.mean(0);
        A.subiRowVector(mean);
    }

    long m = A.rows();
    long n = A.columns();

    // The prepare SVD results, we'll decomp A to UxSxV'
    INDArray s = Nd4j.create(A.dataType(), m < n ? m : n);
    INDArray VT = Nd4j.create(A.dataType(), new long[]{n, n}, 'f');

    // Note - we don't care about U 
    Nd4j.getBlasWrapper().lapack().gesvd(A, s, null, VT);

    // Now convert the eigs of X into the eigs of the covariance matrix
    for (int i = 0; i < s.length(); i++) {
        s.putScalar(i, Math.sqrt(s.getDouble(i)) / (m - 1));
    }

    // Now find how many features we need to preserve the required variance
    // Which is the same percentage as a cumulative sum of the eigenvalues' percentages
    double totalEigSum = s.sumNumber().doubleValue() * variance;
    int k = -1; // we will reduce to k dimensions
    double runningTotal = 0;
    for (int i = 0; i < s.length(); i++) {
        runningTotal += s.getDouble(i);
        if (runningTotal >= totalEigSum) { // OK I know it's a float, but what else can we do ?
            k = i + 1; // we will keep this many features to preserve the reqd. variance
            break;
        }
    }
    if (k == -1) { // if we need everything
        throw new RuntimeException("No reduction possible for reqd. variance - use smaller variance");
    }
    // So now let's rip out the appropriate number of left singular vectors from
    // the V output (note we pulls rows since VT is a transpose of V)
    INDArray V = VT.transpose();
    INDArray factor = Nd4j.createUninitialized(A.dataType(), new long[]{n, k}, 'f');
    for (int i = 0; i < k; i++) {
        factor.putColumn(i, V.getColumn(i));
    }

    return factor;
}

Example 20

Source File: NDArrayTestsFortran.java From deeplearning4j with Apache License 2.0

3 votes

@Test
public void testColumns() {
    INDArray arr = Nd4j.create(new long[] {3, 2}).castTo(DataType.DOUBLE);
    INDArray column = Nd4j.create(new double[] {1, 2, 3});
    arr.putColumn(0, column);

    INDArray firstColumn = arr.getColumn(0);

    assertEquals(column, firstColumn);


    INDArray column1 = Nd4j.create(new double[] {4, 5, 6});
    arr.putColumn(1, column1);
    INDArray testRow1 = arr.getColumn(1);
    assertEquals(column1, testRow1);


    INDArray evenArr = Nd4j.create(new double[] {1, 2, 3, 4}, new long[] {2, 2});
    INDArray put = Nd4j.create(new double[] {5, 6});
    evenArr.putColumn(1, put);
    INDArray testColumn = evenArr.getColumn(1);
    assertEquals(put, testColumn);


    INDArray n = Nd4j.create(Nd4j.linspace(1, 4, 4, DataType.DOUBLE).data(), new long[] {2, 2}).castTo(DataType.DOUBLE);
    INDArray column23 = n.getColumn(0);
    INDArray column12 = Nd4j.create(new double[] {1, 2});
    assertEquals(column23, column12);


    INDArray column0 = n.getColumn(1);
    INDArray column01 = Nd4j.create(new double[] {3, 4});
    assertEquals(column0, column01);


}