Java Code Examples for org.nd4j.linalg.dataset.api.preprocessor.NormalizerStandardize#transform()

The following examples show how to use org.nd4j.linalg.dataset.api.preprocessor.NormalizerStandardize#transform() . You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: PreProcessor3D4DTest.java    From deeplearning4j with Apache License 2.0 6 votes vote down vote up
@Test
public void testBruteForce4d() {
    Construct4dDataSet imageDataSet = new Construct4dDataSet(10, 5, 10, 15);

    NormalizerStandardize myNormalizer = new NormalizerStandardize();
    myNormalizer.fit(imageDataSet.sampleDataSet);
    assertEquals(imageDataSet.expectedMean, myNormalizer.getMean());

    float aat = Transforms.abs(myNormalizer.getStd().div(imageDataSet.expectedStd).sub(1)).maxNumber().floatValue();
    float abt = myNormalizer.getStd().maxNumber().floatValue();
    float act = imageDataSet.expectedStd.maxNumber().floatValue();
    System.out.println("ValA: " + aat);
    System.out.println("ValB: " + abt);
    System.out.println("ValC: " + act);
    assertTrue(aat < 0.05);

    NormalizerMinMaxScaler myMinMaxScaler = new NormalizerMinMaxScaler();
    myMinMaxScaler.fit(imageDataSet.sampleDataSet);
    assertEquals(imageDataSet.expectedMin, myMinMaxScaler.getMin());
    assertEquals(imageDataSet.expectedMax, myMinMaxScaler.getMax());

    DataSet copyDataSet = imageDataSet.sampleDataSet.copy();
    myNormalizer.transform(copyDataSet);
}
 
Example 2
Source File: RPTreeTest.java    From deeplearning4j with Apache License 2.0 6 votes vote down vote up
@Test
public void testRPTree() throws Exception {
    DataSetIterator mnist = new MnistDataSetIterator(150,150);
    RPTree rpTree = new RPTree(784,50);
    DataSet d = mnist.next();
    NormalizerStandardize normalizerStandardize = new NormalizerStandardize();
    normalizerStandardize.fit(d);
    normalizerStandardize.transform(d.getFeatures());
    INDArray data = d.getFeatures();
    rpTree.buildTree(data);
    assertEquals(4,rpTree.getLeaves().size());
    assertEquals(0,rpTree.getRoot().getDepth());

    List<Integer> candidates = rpTree.getCandidates(data.getRow(0));
    assertFalse(candidates.isEmpty());
    assertEquals(10,rpTree.query(data.slice(0),10).length());
    System.out.println(candidates.size());

    rpTree.addNodeAtIndex(150,data.getRow(0));

}
 
Example 3
Source File: NormalizerStandardizeTest.java    From deeplearning4j with Apache License 2.0 6 votes vote down vote up
@Test
public void testRevert() {
    double tolerancePerc = 0.01; // 0.01% of correct value
    int nSamples = 500;
    int nFeatures = 3;

    INDArray featureSet = Nd4j.randn(nSamples, nFeatures);
    INDArray labelSet = Nd4j.zeros(nSamples, 1);
    DataSet sampleDataSet = new DataSet(featureSet, labelSet);

    NormalizerStandardize myNormalizer = new NormalizerStandardize();
    myNormalizer.fit(sampleDataSet);
    DataSet transformed = sampleDataSet.copy();
    myNormalizer.transform(transformed);
    //System.out.println(transformed.getFeatures());
    myNormalizer.revert(transformed);
    //System.out.println(transformed.getFeatures());
    INDArray delta = Transforms.abs(transformed.getFeatures().sub(sampleDataSet.getFeatures()))
                    .div(sampleDataSet.getFeatures());
    double maxdeltaPerc = delta.max(0, 1).mul(100).getDouble(0);
    assertTrue(maxdeltaPerc < tolerancePerc);
}
 
Example 4
Source File: NormalizerStandardizeTest.java    From nd4j with Apache License 2.0 6 votes vote down vote up
@Test
public void testRevert() {
    double tolerancePerc = 0.01; // 0.01% of correct value
    int nSamples = 500;
    int nFeatures = 3;

    INDArray featureSet = Nd4j.randn(nSamples, nFeatures);
    INDArray labelSet = Nd4j.zeros(nSamples, 1);
    DataSet sampleDataSet = new DataSet(featureSet, labelSet);

    NormalizerStandardize myNormalizer = new NormalizerStandardize();
    myNormalizer.fit(sampleDataSet);
    DataSet transformed = sampleDataSet.copy();
    myNormalizer.transform(transformed);
    //System.out.println(transformed.getFeatures());
    myNormalizer.revert(transformed);
    //System.out.println(transformed.getFeatures());
    INDArray delta = Transforms.abs(transformed.getFeatures().sub(sampleDataSet.getFeatures()))
                    .div(sampleDataSet.getFeatures());
    double maxdeltaPerc = delta.max(0, 1).mul(100).getDouble(0, 0);
    assertTrue(maxdeltaPerc < tolerancePerc);
}
 
Example 5
Source File: PreProcessor3D4DTest.java    From nd4j with Apache License 2.0 6 votes vote down vote up
@Test
public void testBruteForce4d() {
    Construct4dDataSet imageDataSet = new Construct4dDataSet(10, 5, 10, 15);

    NormalizerStandardize myNormalizer = new NormalizerStandardize();
    myNormalizer.fit(imageDataSet.sampleDataSet);
    assertEquals(imageDataSet.expectedMean, myNormalizer.getMean());

    float aat = Transforms.abs(myNormalizer.getStd().div(imageDataSet.expectedStd).sub(1)).maxNumber().floatValue();
    float abt = myNormalizer.getStd().maxNumber().floatValue();
    float act = imageDataSet.expectedStd.maxNumber().floatValue();
    System.out.println("ValA: " + aat);
    System.out.println("ValB: " + abt);
    System.out.println("ValC: " + act);
    assertTrue(aat < 0.05);

    NormalizerMinMaxScaler myMinMaxScaler = new NormalizerMinMaxScaler();
    myMinMaxScaler.fit(imageDataSet.sampleDataSet);
    assertEquals(imageDataSet.expectedMin, myMinMaxScaler.getMin());
    assertEquals(imageDataSet.expectedMax, myMinMaxScaler.getMax());

    DataSet copyDataSet = imageDataSet.sampleDataSet.copy();
    myNormalizer.transform(copyDataSet);
}
 
Example 6
Source File: NormalizerStandardizeTest.java    From nd4j with Apache License 2.0 5 votes vote down vote up
@Test
public void testUnderOverflow() {
    // This dataset will be basically constant with a small std deviation
    // And the constant is large. Checking if algorithm can handle
    double tolerancePerc = 1; //Within 1 %
    double toleranceAbs = 0.0005;
    int nSamples = 1000;
    int bSize = 10;
    int x = -1000000, y = 1000000;
    double z = 1000000;

    INDArray featureX = Nd4j.rand(nSamples, 1).mul(1).add(x);
    INDArray featureY = Nd4j.rand(nSamples, 1).mul(2).add(y);
    INDArray featureZ = Nd4j.rand(nSamples, 1).mul(3).add(z);
    INDArray featureSet = Nd4j.concat(1, featureX, featureY, featureZ);
    INDArray labelSet = Nd4j.zeros(nSamples, 1);
    DataSet sampleDataSet = new DataSet(featureSet, labelSet);
    DataSetIterator sampleIter = new TestDataSetIterator(sampleDataSet, bSize);

    INDArray theoreticalMean = Nd4j.create(new double[] {x, y, z});

    NormalizerStandardize myNormalizer = new NormalizerStandardize();
    myNormalizer.fit(sampleIter);

    INDArray meanDelta = Transforms.abs(theoreticalMean.sub(myNormalizer.getMean()));
    INDArray meanDeltaPerc = meanDelta.mul(100).div(theoreticalMean);
    assertTrue(meanDeltaPerc.max(1).getDouble(0, 0) < tolerancePerc);

    //this just has to not barf
    //myNormalizer.transform(sampleIter);
    myNormalizer.transform(sampleDataSet);
}
 
Example 7
Source File: NormalizerStandardizeTest.java    From nd4j with Apache License 2.0 5 votes vote down vote up
@Test
public void testConstant() {
    double tolerancePerc = 10.0; // 10% of correct value
    int nSamples = 500;
    int nFeatures = 3;
    int constant = 100;

    INDArray featureSet = Nd4j.zeros(nSamples, nFeatures).add(constant);
    INDArray labelSet = Nd4j.zeros(nSamples, 1);
    DataSet sampleDataSet = new DataSet(featureSet, labelSet);


    NormalizerStandardize myNormalizer = new NormalizerStandardize();
    myNormalizer.fit(sampleDataSet);
    //Checking if we gets nans
    assertFalse(Double.isNaN(myNormalizer.getStd().getDouble(0)));

    myNormalizer.transform(sampleDataSet);
    //Checking if we gets nans, because std dev is zero
    assertFalse(Double.isNaN(sampleDataSet.getFeatures().min(0, 1).getDouble(0)));
    //Checking to see if transformed values are close enough to zero
    assertEquals(Transforms.abs(sampleDataSet.getFeatures()).max(0, 1).getDouble(0, 0), 0,
                    constant * tolerancePerc / 100.0);

    myNormalizer.revert(sampleDataSet);
    //Checking if we gets nans, because std dev is zero
    assertFalse(Double.isNaN(sampleDataSet.getFeatures().min(0, 1).getDouble(0)));
    assertEquals(Transforms.abs(sampleDataSet.getFeatures().sub(featureSet)).min(0, 1).getDouble(0), 0,
                    constant * tolerancePerc / 100.0);
}
 
Example 8
Source File: DataSet.java    From nd4j with Apache License 2.0 5 votes vote down vote up
@Override
public void normalize() {
    //FeatureUtil.normalizeMatrix(getFeatures());
    NormalizerStandardize inClassPreProcessor = new NormalizerStandardize();
    inClassPreProcessor.fit(this);
    inClassPreProcessor.transform(this);
}
 
Example 9
Source File: NormalizationTests.java    From DataVec with Apache License 2.0 5 votes vote down vote up
@Test
public void testMeanStdZeros() {
    List<List<Writable>> data = new ArrayList<>();
    Schema.Builder builder = new Schema.Builder();
    int numColumns = 6;
    for (int i = 0; i < numColumns; i++)
        builder.addColumnDouble(String.valueOf(i));

    for (int i = 0; i < 5; i++) {
        List<Writable> record = new ArrayList<>(numColumns);
        data.add(record);
        for (int j = 0; j < numColumns; j++) {
            record.add(new DoubleWritable(1.0));
        }

    }

    INDArray arr = RecordConverter.toMatrix(data);

    Schema schema = builder.build();
    JavaRDD<List<Writable>> rdd = sc.parallelize(data);
    DataRowsFacade dataFrame = DataFrames.toDataFrame(schema, rdd);

    //assert equivalent to the ndarray pre processing
    NormalizerStandardize standardScaler = new NormalizerStandardize();
    standardScaler.fit(new DataSet(arr.dup(), arr.dup()));
    INDArray standardScalered = arr.dup();
    standardScaler.transform(new DataSet(standardScalered, standardScalered));
    DataNormalization zeroToOne = new NormalizerMinMaxScaler();
    zeroToOne.fit(new DataSet(arr.dup(), arr.dup()));
    INDArray zeroToOnes = arr.dup();
    zeroToOne.transform(new DataSet(zeroToOnes, zeroToOnes));
    List<Row> rows = Normalization.stdDevMeanColumns(dataFrame, dataFrame.get().columns());
    INDArray assertion = DataFrames.toMatrix(rows);
    //compare standard deviation
    assertTrue(standardScaler.getStd().equalsWithEps(assertion.getRow(0), 1e-1));
    //compare mean
    assertTrue(standardScaler.getMean().equalsWithEps(assertion.getRow(1), 1e-1));

}
 
Example 10
Source File: NormalizerStandardizeTest.java    From deeplearning4j with Apache License 2.0 5 votes vote down vote up
@Test
public void testUnderOverflow() {
    // This dataset will be basically constant with a small std deviation
    // And the constant is large. Checking if algorithm can handle
    double tolerancePerc = 1; //Within 1 %
    double toleranceAbs = 0.0005;
    int nSamples = 1000;
    int bSize = 10;
    int x = -1000000, y = 1000000;
    double z = 1000000;

    INDArray featureX = Nd4j.rand(nSamples, 1).mul(1).add(x);
    INDArray featureY = Nd4j.rand(nSamples, 1).mul(2).add(y);
    INDArray featureZ = Nd4j.rand(nSamples, 1).mul(3).add(z);
    INDArray featureSet = Nd4j.concat(1, featureX, featureY, featureZ);
    INDArray labelSet = Nd4j.zeros(nSamples, 1);
    DataSet sampleDataSet = new DataSet(featureSet, labelSet);
    DataSetIterator sampleIter = new TestDataSetIterator(sampleDataSet, bSize);

    INDArray theoreticalMean = Nd4j.create(new float[] {x, y, (float) z}).castTo(Nd4j.defaultFloatingPointType()).reshape(1, -1);

    NormalizerStandardize myNormalizer = new NormalizerStandardize();
    myNormalizer.fit(sampleIter);

    INDArray meanDelta = Transforms.abs(theoreticalMean.sub(myNormalizer.getMean()));
    INDArray meanDeltaPerc = meanDelta.mul(100).div(theoreticalMean);
    assertTrue(meanDeltaPerc.max(1).getDouble(0) < tolerancePerc);

    //this just has to not barf
    //myNormalizer.transform(sampleIter);
    myNormalizer.transform(sampleDataSet);
}
 
Example 11
Source File: NormalizerStandardizeTest.java    From deeplearning4j with Apache License 2.0 5 votes vote down vote up
@Test
public void testConstant() {
    double tolerancePerc = 10.0; // 10% of correct value
    int nSamples = 500;
    int nFeatures = 3;
    int constant = 100;

    INDArray featureSet = Nd4j.zeros(nSamples, nFeatures).add(constant);
    INDArray labelSet = Nd4j.zeros(nSamples, 1);
    DataSet sampleDataSet = new DataSet(featureSet, labelSet);


    NormalizerStandardize myNormalizer = new NormalizerStandardize();
    myNormalizer.fit(sampleDataSet);
    //Checking if we gets nans
    assertFalse(Double.isNaN(myNormalizer.getStd().getDouble(0)));

    myNormalizer.transform(sampleDataSet);
    //Checking if we gets nans, because std dev is zero
    assertFalse(Double.isNaN(sampleDataSet.getFeatures().min(0, 1).getDouble(0)));
    //Checking to see if transformed values are close enough to zero
    assertEquals(Transforms.abs(sampleDataSet.getFeatures()).max(0, 1).getDouble(0), 0,
                    constant * tolerancePerc / 100.0);

    myNormalizer.revert(sampleDataSet);
    //Checking if we gets nans, because std dev is zero
    assertFalse(Double.isNaN(sampleDataSet.getFeatures().min(0, 1).getDouble(0)));
    assertEquals(Transforms.abs(sampleDataSet.getFeatures().sub(featureSet)).min(0, 1).getDouble(0), 0,
                    constant * tolerancePerc / 100.0);
}
 
Example 12
Source File: DataSet.java    From deeplearning4j with Apache License 2.0 5 votes vote down vote up
@Override
public void normalize() {
    //FeatureUtil.normalizeMatrix(getFeatures());
    NormalizerStandardize inClassPreProcessor = new NormalizerStandardize();
    inClassPreProcessor.fit(this);
    inClassPreProcessor.transform(this);
}
 
Example 13
Source File: EvaluationToolsTests.java    From deeplearning4j with Apache License 2.0 5 votes vote down vote up
@Test
    public void testRocMultiToHtml() throws Exception {
        DataSetIterator iter = new IrisDataSetIterator(150, 150);

        MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder().weightInit(WeightInit.XAVIER).list()
                        .layer(0, new DenseLayer.Builder().nIn(4).nOut(4).activation(Activation.TANH).build()).layer(1,
                                        new OutputLayer.Builder().nIn(4).nOut(3).activation(Activation.SOFTMAX)
                                                        .lossFunction(LossFunctions.LossFunction.MCXENT).build())
                        .build();
        MultiLayerNetwork net = new MultiLayerNetwork(conf);
        net.init();

        NormalizerStandardize ns = new NormalizerStandardize();
        DataSet ds = iter.next();
        ns.fit(ds);
        ns.transform(ds);

        for (int i = 0; i < 30; i++) {
            net.fit(ds);
        }

        for (int numSteps : new int[] {20, 0}) {
            ROCMultiClass roc = new ROCMultiClass(numSteps);
            iter.reset();

            INDArray f = ds.getFeatures();
            INDArray l = ds.getLabels();
            INDArray out = net.output(f);
            roc.eval(l, out);


            String str = EvaluationTools.rocChartToHtml(roc, Arrays.asList("setosa", "versicolor", "virginica"));
//            System.out.println(str);
        }
    }
 
Example 14
Source File: NormalizationTests.java    From deeplearning4j with Apache License 2.0 4 votes vote down vote up
@Test
public void normalizationTests() {
    List<List<Writable>> data = new ArrayList<>();
    Schema.Builder builder = new Schema.Builder();
    int numColumns = 6;
    for (int i = 0; i < numColumns; i++)
        builder.addColumnDouble(String.valueOf(i));

    for (int i = 0; i < 5; i++) {
        List<Writable> record = new ArrayList<>(numColumns);
        data.add(record);
        for (int j = 0; j < numColumns; j++) {
            record.add(new DoubleWritable(1.0));
        }

    }

    INDArray arr = RecordConverter.toMatrix(DataType.DOUBLE, data);

    Schema schema = builder.build();
    JavaRDD<List<Writable>> rdd = sc.parallelize(data);
    assertEquals(schema, DataFrames.fromStructType(DataFrames.fromSchema(schema)));
    assertEquals(rdd.collect(), DataFrames.toRecords(DataFrames.toDataFrame(schema, rdd)).getSecond().collect());

    Dataset<Row> dataFrame = DataFrames.toDataFrame(schema, rdd);
    dataFrame.show();
    Normalization.zeromeanUnitVariance(dataFrame).show();
    Normalization.normalize(dataFrame).show();

    //assert equivalent to the ndarray pre processing
    NormalizerStandardize standardScaler = new NormalizerStandardize();
    standardScaler.fit(new DataSet(arr.dup(), arr.dup()));
    INDArray standardScalered = arr.dup();
    standardScaler.transform(new DataSet(standardScalered, standardScalered));
    DataNormalization zeroToOne = new NormalizerMinMaxScaler();
    zeroToOne.fit(new DataSet(arr.dup(), arr.dup()));
    INDArray zeroToOnes = arr.dup();
    zeroToOne.transform(new DataSet(zeroToOnes, zeroToOnes));

    INDArray zeroMeanUnitVarianceDataFrame =
                    RecordConverter.toMatrix(DataType.DOUBLE, Normalization.zeromeanUnitVariance(schema, rdd).collect());
    INDArray zeroMeanUnitVarianceDataFrameZeroToOne =
                    RecordConverter.toMatrix(DataType.DOUBLE, Normalization.normalize(schema, rdd).collect());
    assertEquals(standardScalered, zeroMeanUnitVarianceDataFrame);
    assertTrue(zeroToOnes.equalsWithEps(zeroMeanUnitVarianceDataFrameZeroToOne, 1e-1));

}
 
Example 15
Source File: GradientCheckTests.java    From deeplearning4j with Apache License 2.0 4 votes vote down vote up
@Test
    public void testAutoEncoder() {
        //As above (testGradientMLP2LayerIrisSimple()) but with L2, L1, and both L2/L1 applied
        //Need to run gradient through updater, so that L2 can be applied

        Activation[] activFns = {Activation.SIGMOID, Activation.TANH};
        boolean[] characteristic = {false, true}; //If true: run some backprop steps first

        LossFunction[] lossFunctions = {LossFunction.MCXENT, LossFunction.MSE};
        Activation[] outputActivations = {Activation.SOFTMAX, Activation.TANH};

        DataNormalization scaler = new NormalizerMinMaxScaler();
        DataSetIterator iter = new IrisDataSetIterator(150, 150);
        scaler.fit(iter);
        iter.setPreProcessor(scaler);
        DataSet ds = iter.next();
        INDArray input = ds.getFeatures();
        INDArray labels = ds.getLabels();

        NormalizerStandardize norm = new NormalizerStandardize();
        norm.fit(ds);
        norm.transform(ds);

        double[] l2vals = {0.2, 0.0, 0.2};
        double[] l1vals = {0.0, 0.3, 0.3}; //i.e., use l2vals[i] with l1vals[i]

        for (Activation afn : activFns) {
            for (boolean doLearningFirst : characteristic) {
                for (int i = 0; i < lossFunctions.length; i++) {
                    for (int k = 0; k < l2vals.length; k++) {
                        LossFunction lf = lossFunctions[i];
                        Activation outputActivation = outputActivations[i];
                        double l2 = l2vals[k];
                        double l1 = l1vals[k];

                        Nd4j.getRandom().setSeed(12345);
                        MultiLayerConfiguration conf =
                                        new NeuralNetConfiguration.Builder()
                                                        .dataType(DataType.DOUBLE)
                                                        .updater(new NoOp())
                                                        .l2(l2).l1(l1)
                                                        .optimizationAlgo(OptimizationAlgorithm.CONJUGATE_GRADIENT)
                                                        .seed(12345L)
                                                        .dist(new NormalDistribution(0, 1))
                                                        .list().layer(0,
                                                                        new AutoEncoder.Builder().nIn(4).nOut(3)
                                                                                        .activation(afn).build())
                                                        .layer(1, new OutputLayer.Builder(lf).nIn(3).nOut(3)
                                                                        .activation(outputActivation).build())
                                                        .build();

                        MultiLayerNetwork mln = new MultiLayerNetwork(conf);
                        mln.init();

                        String msg;
                        if (doLearningFirst) {
                            //Run a number of iterations of learning
                            mln.setInput(ds.getFeatures());
                            mln.setLabels(ds.getLabels());
                            mln.computeGradientAndScore();
                            double scoreBefore = mln.score();
                            for (int j = 0; j < 10; j++)
                                mln.fit(ds);
                            mln.computeGradientAndScore();
                            double scoreAfter = mln.score();
                            //Can't test in 'characteristic mode of operation' if not learning
                            msg = "testGradMLP2LayerIrisSimple() - score did not (sufficiently) decrease during learning - activationFn="
                                            + afn + ", lossFn=" + lf + ", outputActivation=" + outputActivation
                                            + ", doLearningFirst=" + doLearningFirst + ", l2=" + l2 + ", l1=" + l1
                                            + " (before=" + scoreBefore + ", scoreAfter=" + scoreAfter + ")";
                            assertTrue(msg, scoreAfter < scoreBefore);
                        }

                        msg = "testGradMLP2LayerIrisSimple() - activationFn=" + afn + ", lossFn=" + lf
                                        + ", outputActivation=" + outputActivation + ", doLearningFirst="
                                        + doLearningFirst + ", l2=" + l2 + ", l1=" + l1;
                        if (PRINT_RESULTS) {
                            System.out.println(msg);
//                            for (int j = 0; j < mln.getnLayers(); j++)
//                                System.out.println("Layer " + j + " # params: " + mln.getLayer(j).numParams());
                        }

                        boolean gradOK = GradientCheckUtil.checkGradients(mln, DEFAULT_EPS, DEFAULT_MAX_REL_ERROR,
                                        DEFAULT_MIN_ABS_ERROR, PRINT_RESULTS, RETURN_ON_FIRST_FAILURE, input, labels);
                        assertTrue(msg, gradOK);
                        TestUtils.testModelSerialization(mln);
                    }
                }
            }
        }
    }
 
Example 16
Source File: EvaluationToolsTests.java    From deeplearning4j with Apache License 2.0 4 votes vote down vote up
@Test
public void testRocHtml() {

    DataSetIterator iter = new IrisDataSetIterator(150, 150);

    MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder().weightInit(WeightInit.XAVIER).list()
                    .layer(0, new DenseLayer.Builder().nIn(4).nOut(4).activation(Activation.TANH).build()).layer(1,
                                    new OutputLayer.Builder().nIn(4).nOut(2).activation(Activation.SOFTMAX)
                                                    .lossFunction(LossFunctions.LossFunction.MCXENT).build())
                    .build();
    MultiLayerNetwork net = new MultiLayerNetwork(conf);
    net.init();

    NormalizerStandardize ns = new NormalizerStandardize();
    DataSet ds = iter.next();
    ns.fit(ds);
    ns.transform(ds);

    INDArray newLabels = Nd4j.create(150, 2);
    newLabels.getColumn(0).assign(ds.getLabels().getColumn(0));
    newLabels.getColumn(0).addi(ds.getLabels().getColumn(1));
    newLabels.getColumn(1).assign(ds.getLabels().getColumn(2));
    ds.setLabels(newLabels);

    for (int i = 0; i < 30; i++) {
        net.fit(ds);
    }

    for (int numSteps : new int[] {20, 0}) {
        ROC roc = new ROC(numSteps);
        iter.reset();

        INDArray f = ds.getFeatures();
        INDArray l = ds.getLabels();
        INDArray out = net.output(f);
        roc.eval(l, out);


        String str = EvaluationTools.rocChartToHtml(roc);
        //            System.out.println(str);
    }
}
 
Example 17
Source File: NormalizationTests.java    From DataVec with Apache License 2.0 4 votes vote down vote up
@Test
public void normalizationTests() {
    List<List<Writable>> data = new ArrayList<>();
    Schema.Builder builder = new Schema.Builder();
    int numColumns = 6;
    for (int i = 0; i < numColumns; i++)
        builder.addColumnDouble(String.valueOf(i));

    for (int i = 0; i < 5; i++) {
        List<Writable> record = new ArrayList<>(numColumns);
        data.add(record);
        for (int j = 0; j < numColumns; j++) {
            record.add(new DoubleWritable(1.0));
        }

    }

    INDArray arr = RecordConverter.toMatrix(data);

    Schema schema = builder.build();
    JavaRDD<List<Writable>> rdd = sc.parallelize(data);
    assertEquals(schema, DataFrames.fromStructType(DataFrames.fromSchema(schema)));
    assertEquals(rdd.collect(), DataFrames.toRecords(DataFrames.toDataFrame(schema, rdd)).getSecond().collect());

    DataRowsFacade dataFrame = DataFrames.toDataFrame(schema, rdd);
    dataFrame.get().show();
    Normalization.zeromeanUnitVariance(dataFrame).get().show();
    Normalization.normalize(dataFrame).get().show();

    //assert equivalent to the ndarray pre processing
    NormalizerStandardize standardScaler = new NormalizerStandardize();
    standardScaler.fit(new DataSet(arr.dup(), arr.dup()));
    INDArray standardScalered = arr.dup();
    standardScaler.transform(new DataSet(standardScalered, standardScalered));
    DataNormalization zeroToOne = new NormalizerMinMaxScaler();
    zeroToOne.fit(new DataSet(arr.dup(), arr.dup()));
    INDArray zeroToOnes = arr.dup();
    zeroToOne.transform(new DataSet(zeroToOnes, zeroToOnes));

    INDArray zeroMeanUnitVarianceDataFrame =
                    RecordConverter.toMatrix(Normalization.zeromeanUnitVariance(schema, rdd).collect());
    INDArray zeroMeanUnitVarianceDataFrameZeroToOne =
                    RecordConverter.toMatrix(Normalization.normalize(schema, rdd).collect());
    assertEquals(standardScalered, zeroMeanUnitVarianceDataFrame);
    assertTrue(zeroToOnes.equalsWithEps(zeroMeanUnitVarianceDataFrameZeroToOne, 1e-1));

}