Java Code Examples for org.nd4j.linalg.dataset.api.preprocessor.NormalizerStandardize#fit()

The following examples show how to use org.nd4j.linalg.dataset.api.preprocessor.NormalizerStandardize#fit() . You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: NormalizerStandardizeTest.java    From nd4j with Apache License 2.0 6 votes vote down vote up
@Test
public void testRevert() {
    double tolerancePerc = 0.01; // 0.01% of correct value
    int nSamples = 500;
    int nFeatures = 3;

    INDArray featureSet = Nd4j.randn(nSamples, nFeatures);
    INDArray labelSet = Nd4j.zeros(nSamples, 1);
    DataSet sampleDataSet = new DataSet(featureSet, labelSet);

    NormalizerStandardize myNormalizer = new NormalizerStandardize();
    myNormalizer.fit(sampleDataSet);
    DataSet transformed = sampleDataSet.copy();
    myNormalizer.transform(transformed);
    //System.out.println(transformed.getFeatures());
    myNormalizer.revert(transformed);
    //System.out.println(transformed.getFeatures());
    INDArray delta = Transforms.abs(transformed.getFeatures().sub(sampleDataSet.getFeatures()))
                    .div(sampleDataSet.getFeatures());
    double maxdeltaPerc = delta.max(0, 1).mul(100).getDouble(0, 0);
    assertTrue(maxdeltaPerc < tolerancePerc);
}
 
Example 2
Source File: PreProcessor3D4DTest.java    From deeplearning4j with Apache License 2.0 6 votes vote down vote up
@Test
public void testBruteForce4d() {
    Construct4dDataSet imageDataSet = new Construct4dDataSet(10, 5, 10, 15);

    NormalizerStandardize myNormalizer = new NormalizerStandardize();
    myNormalizer.fit(imageDataSet.sampleDataSet);
    assertEquals(imageDataSet.expectedMean, myNormalizer.getMean());

    float aat = Transforms.abs(myNormalizer.getStd().div(imageDataSet.expectedStd).sub(1)).maxNumber().floatValue();
    float abt = myNormalizer.getStd().maxNumber().floatValue();
    float act = imageDataSet.expectedStd.maxNumber().floatValue();
    System.out.println("ValA: " + aat);
    System.out.println("ValB: " + abt);
    System.out.println("ValC: " + act);
    assertTrue(aat < 0.05);

    NormalizerMinMaxScaler myMinMaxScaler = new NormalizerMinMaxScaler();
    myMinMaxScaler.fit(imageDataSet.sampleDataSet);
    assertEquals(imageDataSet.expectedMin, myMinMaxScaler.getMin());
    assertEquals(imageDataSet.expectedMax, myMinMaxScaler.getMax());

    DataSet copyDataSet = imageDataSet.sampleDataSet.copy();
    myNormalizer.transform(copyDataSet);
}
 
Example 3
Source File: CustomerRetentionPredictionApi.java    From Java-Deep-Learning-Cookbook with MIT License 5 votes vote down vote up
public static INDArray generateOutput(File inputFile, String modelFilePath) throws IOException, InterruptedException {
    final File modelFile = new File(modelFilePath);
    final MultiLayerNetwork network = ModelSerializer.restoreMultiLayerNetwork(modelFile);
    final RecordReader recordReader = generateReader(inputFile);
    //final INDArray array = RecordConverter.toArray(recordReader.next());
    final NormalizerStandardize normalizerStandardize = ModelSerializer.restoreNormalizerFromFile(modelFile);
    //normalizerStandardize.transform(array);
    final DataSetIterator dataSetIterator = new RecordReaderDataSetIterator.Builder(recordReader,1).build();
    normalizerStandardize.fit(dataSetIterator);
    dataSetIterator.setPreProcessor(normalizerStandardize);
    return network.output(dataSetIterator);

}
 
Example 4
Source File: NormalizerStandardizeTest.java    From deeplearning4j with Apache License 2.0 5 votes vote down vote up
@Test
public void testUnderOverflow() {
    // This dataset will be basically constant with a small std deviation
    // And the constant is large. Checking if algorithm can handle
    double tolerancePerc = 1; //Within 1 %
    double toleranceAbs = 0.0005;
    int nSamples = 1000;
    int bSize = 10;
    int x = -1000000, y = 1000000;
    double z = 1000000;

    INDArray featureX = Nd4j.rand(nSamples, 1).mul(1).add(x);
    INDArray featureY = Nd4j.rand(nSamples, 1).mul(2).add(y);
    INDArray featureZ = Nd4j.rand(nSamples, 1).mul(3).add(z);
    INDArray featureSet = Nd4j.concat(1, featureX, featureY, featureZ);
    INDArray labelSet = Nd4j.zeros(nSamples, 1);
    DataSet sampleDataSet = new DataSet(featureSet, labelSet);
    DataSetIterator sampleIter = new TestDataSetIterator(sampleDataSet, bSize);

    INDArray theoreticalMean = Nd4j.create(new float[] {x, y, (float) z}).castTo(Nd4j.defaultFloatingPointType()).reshape(1, -1);

    NormalizerStandardize myNormalizer = new NormalizerStandardize();
    myNormalizer.fit(sampleIter);

    INDArray meanDelta = Transforms.abs(theoreticalMean.sub(myNormalizer.getMean()));
    INDArray meanDeltaPerc = meanDelta.mul(100).div(theoreticalMean);
    assertTrue(meanDeltaPerc.max(1).getDouble(0) < tolerancePerc);

    //this just has to not barf
    //myNormalizer.transform(sampleIter);
    myNormalizer.transform(sampleDataSet);
}
 
Example 5
Source File: DataSet.java    From nd4j with Apache License 2.0 5 votes vote down vote up
@Override
public void normalize() {
    //FeatureUtil.normalizeMatrix(getFeatures());
    NormalizerStandardize inClassPreProcessor = new NormalizerStandardize();
    inClassPreProcessor.fit(this);
    inClassPreProcessor.transform(this);
}
 
Example 6
Source File: NormalizerStandardizeTest.java    From nd4j with Apache License 2.0 5 votes vote down vote up
@Test
public void testConstant() {
    double tolerancePerc = 10.0; // 10% of correct value
    int nSamples = 500;
    int nFeatures = 3;
    int constant = 100;

    INDArray featureSet = Nd4j.zeros(nSamples, nFeatures).add(constant);
    INDArray labelSet = Nd4j.zeros(nSamples, 1);
    DataSet sampleDataSet = new DataSet(featureSet, labelSet);


    NormalizerStandardize myNormalizer = new NormalizerStandardize();
    myNormalizer.fit(sampleDataSet);
    //Checking if we gets nans
    assertFalse(Double.isNaN(myNormalizer.getStd().getDouble(0)));

    myNormalizer.transform(sampleDataSet);
    //Checking if we gets nans, because std dev is zero
    assertFalse(Double.isNaN(sampleDataSet.getFeatures().min(0, 1).getDouble(0)));
    //Checking to see if transformed values are close enough to zero
    assertEquals(Transforms.abs(sampleDataSet.getFeatures()).max(0, 1).getDouble(0, 0), 0,
                    constant * tolerancePerc / 100.0);

    myNormalizer.revert(sampleDataSet);
    //Checking if we gets nans, because std dev is zero
    assertFalse(Double.isNaN(sampleDataSet.getFeatures().min(0, 1).getDouble(0)));
    assertEquals(Transforms.abs(sampleDataSet.getFeatures().sub(featureSet)).min(0, 1).getDouble(0), 0,
                    constant * tolerancePerc / 100.0);
}
 
Example 7
Source File: RPTreeTest.java    From deeplearning4j with Apache License 2.0 5 votes vote down vote up
@Test
public void testRpTreeMaxNodes() throws Exception {
    DataSetIterator mnist = new MnistDataSetIterator(150,150);
    RPForest rpTree = new RPForest(4,4,"euclidean");
    DataSet d = mnist.next();
    NormalizerStandardize normalizerStandardize = new NormalizerStandardize();
    normalizerStandardize.fit(d);
    rpTree.fit(d.getFeatures());
    for(RPTree tree : rpTree.getTrees()) {
        for(RPNode node : tree.getLeaves()) {
            assertTrue(node.getIndices().size() <= rpTree.getMaxSize());
        }
    }

}
 
Example 8
Source File: NormalizerStandardizeTest.java    From nd4j with Apache License 2.0 5 votes vote down vote up
@Test
public void testUnderOverflow() {
    // This dataset will be basically constant with a small std deviation
    // And the constant is large. Checking if algorithm can handle
    double tolerancePerc = 1; //Within 1 %
    double toleranceAbs = 0.0005;
    int nSamples = 1000;
    int bSize = 10;
    int x = -1000000, y = 1000000;
    double z = 1000000;

    INDArray featureX = Nd4j.rand(nSamples, 1).mul(1).add(x);
    INDArray featureY = Nd4j.rand(nSamples, 1).mul(2).add(y);
    INDArray featureZ = Nd4j.rand(nSamples, 1).mul(3).add(z);
    INDArray featureSet = Nd4j.concat(1, featureX, featureY, featureZ);
    INDArray labelSet = Nd4j.zeros(nSamples, 1);
    DataSet sampleDataSet = new DataSet(featureSet, labelSet);
    DataSetIterator sampleIter = new TestDataSetIterator(sampleDataSet, bSize);

    INDArray theoreticalMean = Nd4j.create(new double[] {x, y, z});

    NormalizerStandardize myNormalizer = new NormalizerStandardize();
    myNormalizer.fit(sampleIter);

    INDArray meanDelta = Transforms.abs(theoreticalMean.sub(myNormalizer.getMean()));
    INDArray meanDeltaPerc = meanDelta.mul(100).div(theoreticalMean);
    assertTrue(meanDeltaPerc.max(1).getDouble(0, 0) < tolerancePerc);

    //this just has to not barf
    //myNormalizer.transform(sampleIter);
    myNormalizer.transform(sampleDataSet);
}
 
Example 9
Source File: DataSet.java    From deeplearning4j with Apache License 2.0 5 votes vote down vote up
@Override
public void normalize() {
    //FeatureUtil.normalizeMatrix(getFeatures());
    NormalizerStandardize inClassPreProcessor = new NormalizerStandardize();
    inClassPreProcessor.fit(this);
    inClassPreProcessor.transform(this);
}
 
Example 10
Source File: NormalizerStandardizeLabelsTest.java    From deeplearning4j with Apache License 2.0 4 votes vote down vote up
@Test
public void testBruteForce() {
    /* This test creates a dataset where feature values are multiples of consecutive natural numbers
       The obtained values are compared to the theoretical mean and std dev
     */
    double tolerancePerc = 0.01;
    int nSamples = 5120;
    int x = 1, y = 2, z = 3;

    INDArray featureX = Nd4j.linspace(1, nSamples, nSamples).reshape(nSamples, 1).mul(x);
    INDArray featureY = featureX.mul(y);
    INDArray featureZ = featureX.mul(z);
    INDArray featureSet = Nd4j.concat(1, featureX, featureY, featureZ);
    INDArray labelSet = featureSet.dup().getColumns(0);
    DataSet sampleDataSet = new DataSet(featureSet, labelSet);

    double meanNaturalNums = (nSamples + 1) / 2.0;
    INDArray theoreticalMean =
                    Nd4j.create(new double[] {meanNaturalNums * x, meanNaturalNums * y, meanNaturalNums * z}).reshape(1, -1).castTo(Nd4j.defaultFloatingPointType());
    INDArray theoreticallabelMean = theoreticalMean.dup().getColumns(0);
    double stdNaturalNums = Math.sqrt((nSamples * nSamples - 1) / 12.0);
    INDArray theoreticalStd =
                    Nd4j.create(new double[] {stdNaturalNums * x, stdNaturalNums * y, stdNaturalNums * z}).reshape(1, -1).castTo(Nd4j.defaultFloatingPointType());
    INDArray theoreticallabelStd = theoreticalStd.dup().getColumns(0);

    NormalizerStandardize myNormalizer = new NormalizerStandardize();
    myNormalizer.fitLabel(true);
    myNormalizer.fit(sampleDataSet);

    INDArray meanDelta = Transforms.abs(theoreticalMean.sub(myNormalizer.getMean()));
    INDArray labelDelta = Transforms.abs(theoreticallabelMean.sub(myNormalizer.getLabelMean()));
    INDArray meanDeltaPerc = meanDelta.div(theoreticalMean).mul(100);
    INDArray labelDeltaPerc = labelDelta.div(theoreticallabelMean).mul(100);
    double maxMeanDeltaPerc = meanDeltaPerc.max(1).getDouble(0);
    assertTrue(maxMeanDeltaPerc < tolerancePerc);
    assertTrue(labelDeltaPerc.max(1).getDouble(0) < tolerancePerc);

    INDArray stdDelta = Transforms.abs(theoreticalStd.sub(myNormalizer.getStd()));
    INDArray stdDeltaPerc = stdDelta.div(theoreticalStd).mul(100);
    INDArray stdlabelDeltaPerc =
                    Transforms.abs(theoreticallabelStd.sub(myNormalizer.getLabelStd())).div(theoreticallabelStd);
    double maxStdDeltaPerc = stdDeltaPerc.max(1).mul(100).getDouble(0);
    double maxlabelStdDeltaPerc = stdlabelDeltaPerc.max(1).getDouble(0);
    assertTrue(maxStdDeltaPerc < tolerancePerc);
    assertTrue(maxlabelStdDeltaPerc < tolerancePerc);


    // SAME TEST WITH THE ITERATOR
    int bSize = 10;
    tolerancePerc = 0.1; // 1% of correct value
    DataSetIterator sampleIter = new TestDataSetIterator(sampleDataSet, bSize);
    myNormalizer.fit(sampleIter);

    meanDelta = Transforms.abs(theoreticalMean.sub(myNormalizer.getMean()));
    meanDeltaPerc = meanDelta.div(theoreticalMean).mul(100);
    maxMeanDeltaPerc = meanDeltaPerc.max(1).getDouble(0);
    assertTrue(maxMeanDeltaPerc < tolerancePerc);

    stdDelta = Transforms.abs(theoreticalMean.sub(myNormalizer.getMean()));
    stdDeltaPerc = stdDelta.div(theoreticalStd).mul(100);
    maxStdDeltaPerc = stdDeltaPerc.max(1).getDouble(0);
    assertTrue(maxStdDeltaPerc < tolerancePerc);
}
 
Example 11
Source File: NormalizerStandardizeTest.java    From deeplearning4j with Apache License 2.0 4 votes vote down vote up
@Test
public void testBruteForce() {
    /* This test creates a dataset where feature values are multiples of consecutive natural numbers
       The obtained values are compared to the theoretical mean and std dev
     */
    double tolerancePerc = 0.01; // 0.01% of correct value
    int nSamples = 5120;
    int x = 1, y = 2, z = 3;

    INDArray featureX = Nd4j.linspace(1, nSamples, nSamples, DataType.DOUBLE).reshape(nSamples, 1).mul(x);
    INDArray featureY = featureX.mul(y);
    INDArray featureZ = featureX.mul(z);
    INDArray featureSet = Nd4j.concat(1, featureX, featureY, featureZ);
    INDArray labelSet = Nd4j.zeros(nSamples, 1);
    DataSet sampleDataSet = new DataSet(featureSet, labelSet);

    double meanNaturalNums = (nSamples + 1) / 2.0;
    INDArray theoreticalMean =
                    Nd4j.create(new double[] {meanNaturalNums * x, meanNaturalNums * y, meanNaturalNums * z}).reshape(1, -1);
    double stdNaturalNums = Math.sqrt((nSamples * nSamples - 1) / 12.0);
    INDArray theoreticalStd =
                    Nd4j.create(new double[] {stdNaturalNums * x, stdNaturalNums * y, stdNaturalNums * z}).reshape(1, -1);

    NormalizerStandardize myNormalizer = new NormalizerStandardize();
    myNormalizer.fit(sampleDataSet);

    INDArray meanDelta = Transforms.abs(theoreticalMean.sub(myNormalizer.getMean()));
    INDArray meanDeltaPerc = meanDelta.div(theoreticalMean).mul(100);
    double maxMeanDeltaPerc = meanDeltaPerc.max(1).getDouble(0);
    assertTrue(maxMeanDeltaPerc < tolerancePerc);

    INDArray stdDelta = Transforms.abs(theoreticalStd.sub(myNormalizer.getStd()));
    INDArray stdDeltaPerc = stdDelta.div(theoreticalStd).mul(100);
    double maxStdDeltaPerc = stdDeltaPerc.max(1).getDouble(0);
    assertTrue(maxStdDeltaPerc < tolerancePerc);

    // SAME TEST WITH THE ITERATOR
    int bSize = 10;
    tolerancePerc = 0.1; // 0.1% of correct value
    DataSetIterator sampleIter = new TestDataSetIterator(sampleDataSet, bSize);
    myNormalizer.fit(sampleIter);

    meanDelta = Transforms.abs(theoreticalMean.sub(myNormalizer.getMean()));
    meanDeltaPerc = meanDelta.div(theoreticalMean).mul(100);
    maxMeanDeltaPerc = meanDeltaPerc.max(1).getDouble(0);
    assertTrue(maxMeanDeltaPerc < tolerancePerc);

    stdDelta = Transforms.abs(theoreticalStd.sub(myNormalizer.getStd()));
    stdDeltaPerc = stdDelta.div(theoreticalStd).mul(100);
    maxStdDeltaPerc = stdDeltaPerc.max(1).getDouble(0);
    assertTrue(maxStdDeltaPerc < tolerancePerc);
}
 
Example 12
Source File: NormalizationTests.java    From deeplearning4j with Apache License 2.0 4 votes vote down vote up
@Test
public void normalizationTests() {
    List<List<Writable>> data = new ArrayList<>();
    Schema.Builder builder = new Schema.Builder();
    int numColumns = 6;
    for (int i = 0; i < numColumns; i++)
        builder.addColumnDouble(String.valueOf(i));

    for (int i = 0; i < 5; i++) {
        List<Writable> record = new ArrayList<>(numColumns);
        data.add(record);
        for (int j = 0; j < numColumns; j++) {
            record.add(new DoubleWritable(1.0));
        }

    }

    INDArray arr = RecordConverter.toMatrix(DataType.DOUBLE, data);

    Schema schema = builder.build();
    JavaRDD<List<Writable>> rdd = sc.parallelize(data);
    assertEquals(schema, DataFrames.fromStructType(DataFrames.fromSchema(schema)));
    assertEquals(rdd.collect(), DataFrames.toRecords(DataFrames.toDataFrame(schema, rdd)).getSecond().collect());

    Dataset<Row> dataFrame = DataFrames.toDataFrame(schema, rdd);
    dataFrame.show();
    Normalization.zeromeanUnitVariance(dataFrame).show();
    Normalization.normalize(dataFrame).show();

    //assert equivalent to the ndarray pre processing
    NormalizerStandardize standardScaler = new NormalizerStandardize();
    standardScaler.fit(new DataSet(arr.dup(), arr.dup()));
    INDArray standardScalered = arr.dup();
    standardScaler.transform(new DataSet(standardScalered, standardScalered));
    DataNormalization zeroToOne = new NormalizerMinMaxScaler();
    zeroToOne.fit(new DataSet(arr.dup(), arr.dup()));
    INDArray zeroToOnes = arr.dup();
    zeroToOne.transform(new DataSet(zeroToOnes, zeroToOnes));

    INDArray zeroMeanUnitVarianceDataFrame =
                    RecordConverter.toMatrix(DataType.DOUBLE, Normalization.zeromeanUnitVariance(schema, rdd).collect());
    INDArray zeroMeanUnitVarianceDataFrameZeroToOne =
                    RecordConverter.toMatrix(DataType.DOUBLE, Normalization.normalize(schema, rdd).collect());
    assertEquals(standardScalered, zeroMeanUnitVarianceDataFrame);
    assertTrue(zeroToOnes.equalsWithEps(zeroMeanUnitVarianceDataFrameZeroToOne, 1e-1));

}
 
Example 13
Source File: NormalizerStandardizeTest.java    From nd4j with Apache License 2.0 4 votes vote down vote up
@Test
public void testBruteForce() {
    /* This test creates a dataset where feature values are multiples of consecutive natural numbers
       The obtained values are compared to the theoretical mean and std dev
     */
    double tolerancePerc = 0.01; // 0.01% of correct value
    int nSamples = 5120;
    int x = 1, y = 2, z = 3;

    INDArray featureX = Nd4j.linspace(1, nSamples, nSamples).reshape(nSamples, 1).mul(x);
    INDArray featureY = featureX.mul(y);
    INDArray featureZ = featureX.mul(z);
    INDArray featureSet = Nd4j.concat(1, featureX, featureY, featureZ);
    INDArray labelSet = Nd4j.zeros(nSamples, 1);
    DataSet sampleDataSet = new DataSet(featureSet, labelSet);

    double meanNaturalNums = (nSamples + 1) / 2.0;
    INDArray theoreticalMean =
                    Nd4j.create(new double[] {meanNaturalNums * x, meanNaturalNums * y, meanNaturalNums * z});
    double stdNaturalNums = Math.sqrt((nSamples * nSamples - 1) / 12.0);
    INDArray theoreticalStd =
                    Nd4j.create(new double[] {stdNaturalNums * x, stdNaturalNums * y, stdNaturalNums * z});

    NormalizerStandardize myNormalizer = new NormalizerStandardize();
    myNormalizer.fit(sampleDataSet);

    INDArray meanDelta = Transforms.abs(theoreticalMean.sub(myNormalizer.getMean()));
    INDArray meanDeltaPerc = meanDelta.div(theoreticalMean).mul(100);
    double maxMeanDeltaPerc = meanDeltaPerc.max(1).getDouble(0, 0);
    assertTrue(maxMeanDeltaPerc < tolerancePerc);

    INDArray stdDelta = Transforms.abs(theoreticalStd.sub(myNormalizer.getStd()));
    INDArray stdDeltaPerc = stdDelta.div(theoreticalStd).mul(100);
    double maxStdDeltaPerc = stdDeltaPerc.max(1).getDouble(0, 0);
    assertTrue(maxStdDeltaPerc < tolerancePerc);

    // SAME TEST WITH THE ITERATOR
    int bSize = 10;
    tolerancePerc = 0.1; // 0.1% of correct value
    DataSetIterator sampleIter = new TestDataSetIterator(sampleDataSet, bSize);
    myNormalizer.fit(sampleIter);

    meanDelta = Transforms.abs(theoreticalMean.sub(myNormalizer.getMean()));
    meanDeltaPerc = meanDelta.div(theoreticalMean).mul(100);
    maxMeanDeltaPerc = meanDeltaPerc.max(1).getDouble(0, 0);
    assertTrue(maxMeanDeltaPerc < tolerancePerc);

    stdDelta = Transforms.abs(theoreticalStd.sub(myNormalizer.getStd()));
    stdDeltaPerc = stdDelta.div(theoreticalStd).mul(100);
    maxStdDeltaPerc = stdDeltaPerc.max(1).getDouble(0, 0);
    assertTrue(maxStdDeltaPerc < tolerancePerc);
}
 
Example 14
Source File: PreProcessor3D4DTest.java    From nd4j with Apache License 2.0 4 votes vote down vote up
@Test
public void testBruteForce3dMaskLabels() {

    NormalizerStandardize myNormalizer = new NormalizerStandardize();
    myNormalizer.fitLabel(true);
    NormalizerMinMaxScaler myMinMaxScaler = new NormalizerMinMaxScaler();
    myMinMaxScaler.fitLabel(true);

    //generating a dataset with consecutive numbers as feature values. Dataset also has masks
    int samples = 100;
    INDArray featureScale = Nd4j.create(new float[] {1, 2, 10}).reshape(3, 1);
    int timeStepsU = 5;
    Construct3dDataSet sampleU = new Construct3dDataSet(featureScale, timeStepsU, samples, 1);
    int timeStepsV = 3;
    Construct3dDataSet sampleV = new Construct3dDataSet(featureScale, timeStepsV, samples, sampleU.newOrigin);
    List<DataSet> dataSetList = new ArrayList<DataSet>();
    dataSetList.add(sampleU.sampleDataSet);
    dataSetList.add(sampleV.sampleDataSet);

    DataSet fullDataSetA = DataSet.merge(dataSetList);
    DataSet fullDataSetAA = fullDataSetA.copy();
    //This should be the same datasets as above without a mask
    Construct3dDataSet fullDataSetNoMask =
                    new Construct3dDataSet(featureScale, timeStepsU + timeStepsV, samples, 1);

    //preprocessors - label and feature values are the same
    myNormalizer.fit(fullDataSetA);
    assertEquals(myNormalizer.getMean(), fullDataSetNoMask.expectedMean);
    assertEquals(myNormalizer.getStd(), fullDataSetNoMask.expectedStd);
    assertEquals(myNormalizer.getLabelMean(), fullDataSetNoMask.expectedMean);
    assertEquals(myNormalizer.getLabelStd(), fullDataSetNoMask.expectedStd);

    myMinMaxScaler.fit(fullDataSetAA);
    assertEquals(myMinMaxScaler.getMin(), fullDataSetNoMask.expectedMin);
    assertEquals(myMinMaxScaler.getMax(), fullDataSetNoMask.expectedMax);
    assertEquals(myMinMaxScaler.getLabelMin(), fullDataSetNoMask.expectedMin);
    assertEquals(myMinMaxScaler.getLabelMax(), fullDataSetNoMask.expectedMax);


    //Same Test with an Iterator, values should be close for std, exact for everything else
    DataSetIterator sampleIterA = new TestDataSetIterator(fullDataSetA, 5);
    DataSetIterator sampleIterB = new TestDataSetIterator(fullDataSetAA, 5);

    myNormalizer.fit(sampleIterA);
    assertEquals(myNormalizer.getMean(), fullDataSetNoMask.expectedMean);
    assertEquals(myNormalizer.getLabelMean(), fullDataSetNoMask.expectedMean);
    assertTrue(Transforms.abs(myNormalizer.getStd().div(fullDataSetNoMask.expectedStd).sub(1)).maxNumber()
                    .floatValue() < 0.01);
    assertTrue(Transforms.abs(myNormalizer.getLabelStd().div(fullDataSetNoMask.expectedStd).sub(1)).maxNumber()
                    .floatValue() < 0.01);

    myMinMaxScaler.fit(sampleIterB);
    assertEquals(myMinMaxScaler.getMin(), fullDataSetNoMask.expectedMin);
    assertEquals(myMinMaxScaler.getMax(), fullDataSetNoMask.expectedMax);
    assertEquals(myMinMaxScaler.getLabelMin(), fullDataSetNoMask.expectedMin);
    assertEquals(myMinMaxScaler.getLabelMax(), fullDataSetNoMask.expectedMax);
}
 
Example 15
Source File: ModelSerializerTest.java    From deeplearning4j with Apache License 2.0 4 votes vote down vote up
@Test
public void testRestoreUnsavedNormalizerFromInputStream() throws Exception {
    DataSet dataSet = trivialDataSet();

    NormalizerStandardize norm = new NormalizerStandardize();
    norm.fit(dataSet);

    ComputationGraph cg = simpleComputationGraph();
    cg.init();

    File tempFile = tempDir.newFile();
    ModelSerializer.writeModel(cg, tempFile, true);

    FileInputStream fis = new FileInputStream(tempFile);

    NormalizerStandardize restored = ModelSerializer.restoreNormalizerFromInputStream(fis);

    assertEquals(null, restored);
}
 
Example 16
Source File: GradientCheckTests.java    From deeplearning4j with Apache License 2.0 4 votes vote down vote up
@Test
    public void testAutoEncoder() {
        //As above (testGradientMLP2LayerIrisSimple()) but with L2, L1, and both L2/L1 applied
        //Need to run gradient through updater, so that L2 can be applied

        Activation[] activFns = {Activation.SIGMOID, Activation.TANH};
        boolean[] characteristic = {false, true}; //If true: run some backprop steps first

        LossFunction[] lossFunctions = {LossFunction.MCXENT, LossFunction.MSE};
        Activation[] outputActivations = {Activation.SOFTMAX, Activation.TANH};

        DataNormalization scaler = new NormalizerMinMaxScaler();
        DataSetIterator iter = new IrisDataSetIterator(150, 150);
        scaler.fit(iter);
        iter.setPreProcessor(scaler);
        DataSet ds = iter.next();
        INDArray input = ds.getFeatures();
        INDArray labels = ds.getLabels();

        NormalizerStandardize norm = new NormalizerStandardize();
        norm.fit(ds);
        norm.transform(ds);

        double[] l2vals = {0.2, 0.0, 0.2};
        double[] l1vals = {0.0, 0.3, 0.3}; //i.e., use l2vals[i] with l1vals[i]

        for (Activation afn : activFns) {
            for (boolean doLearningFirst : characteristic) {
                for (int i = 0; i < lossFunctions.length; i++) {
                    for (int k = 0; k < l2vals.length; k++) {
                        LossFunction lf = lossFunctions[i];
                        Activation outputActivation = outputActivations[i];
                        double l2 = l2vals[k];
                        double l1 = l1vals[k];

                        Nd4j.getRandom().setSeed(12345);
                        MultiLayerConfiguration conf =
                                        new NeuralNetConfiguration.Builder()
                                                        .dataType(DataType.DOUBLE)
                                                        .updater(new NoOp())
                                                        .l2(l2).l1(l1)
                                                        .optimizationAlgo(OptimizationAlgorithm.CONJUGATE_GRADIENT)
                                                        .seed(12345L)
                                                        .dist(new NormalDistribution(0, 1))
                                                        .list().layer(0,
                                                                        new AutoEncoder.Builder().nIn(4).nOut(3)
                                                                                        .activation(afn).build())
                                                        .layer(1, new OutputLayer.Builder(lf).nIn(3).nOut(3)
                                                                        .activation(outputActivation).build())
                                                        .build();

                        MultiLayerNetwork mln = new MultiLayerNetwork(conf);
                        mln.init();

                        String msg;
                        if (doLearningFirst) {
                            //Run a number of iterations of learning
                            mln.setInput(ds.getFeatures());
                            mln.setLabels(ds.getLabels());
                            mln.computeGradientAndScore();
                            double scoreBefore = mln.score();
                            for (int j = 0; j < 10; j++)
                                mln.fit(ds);
                            mln.computeGradientAndScore();
                            double scoreAfter = mln.score();
                            //Can't test in 'characteristic mode of operation' if not learning
                            msg = "testGradMLP2LayerIrisSimple() - score did not (sufficiently) decrease during learning - activationFn="
                                            + afn + ", lossFn=" + lf + ", outputActivation=" + outputActivation
                                            + ", doLearningFirst=" + doLearningFirst + ", l2=" + l2 + ", l1=" + l1
                                            + " (before=" + scoreBefore + ", scoreAfter=" + scoreAfter + ")";
                            assertTrue(msg, scoreAfter < scoreBefore);
                        }

                        msg = "testGradMLP2LayerIrisSimple() - activationFn=" + afn + ", lossFn=" + lf
                                        + ", outputActivation=" + outputActivation + ", doLearningFirst="
                                        + doLearningFirst + ", l2=" + l2 + ", l1=" + l1;
                        if (PRINT_RESULTS) {
                            System.out.println(msg);
//                            for (int j = 0; j < mln.getnLayers(); j++)
//                                System.out.println("Layer " + j + " # params: " + mln.getLayer(j).numParams());
                        }

                        boolean gradOK = GradientCheckUtil.checkGradients(mln, DEFAULT_EPS, DEFAULT_MAX_REL_ERROR,
                                        DEFAULT_MIN_ABS_ERROR, PRINT_RESULTS, RETURN_ON_FIRST_FAILURE, input, labels);
                        assertTrue(msg, gradOK);
                        TestUtils.testModelSerialization(mln);
                    }
                }
            }
        }
    }
 
Example 17
Source File: NormalizerStandardizeLabelsTest.java    From nd4j with Apache License 2.0 4 votes vote down vote up
@Test
public void testBruteForce() {
    /* This test creates a dataset where feature values are multiples of consecutive natural numbers
       The obtained values are compared to the theoretical mean and std dev
     */
    double tolerancePerc = 0.01;
    int nSamples = 5120;
    int x = 1, y = 2, z = 3;

    INDArray featureX = Nd4j.linspace(1, nSamples, nSamples).reshape(nSamples, 1).mul(x);
    INDArray featureY = featureX.mul(y);
    INDArray featureZ = featureX.mul(z);
    INDArray featureSet = Nd4j.concat(1, featureX, featureY, featureZ);
    INDArray labelSet = featureSet.dup().getColumns(new int[] {0});
    DataSet sampleDataSet = new DataSet(featureSet, labelSet);

    double meanNaturalNums = (nSamples + 1) / 2.0;
    INDArray theoreticalMean =
                    Nd4j.create(new double[] {meanNaturalNums * x, meanNaturalNums * y, meanNaturalNums * z});
    INDArray theoreticallabelMean = theoreticalMean.dup().getColumns(new int[] {0});
    double stdNaturalNums = Math.sqrt((nSamples * nSamples - 1) / 12.0);
    INDArray theoreticalStd =
                    Nd4j.create(new double[] {stdNaturalNums * x, stdNaturalNums * y, stdNaturalNums * z});
    INDArray theoreticallabelStd = theoreticalStd.dup().getColumns(new int[] {0});

    NormalizerStandardize myNormalizer = new NormalizerStandardize();
    myNormalizer.fitLabel(true);
    myNormalizer.fit(sampleDataSet);

    INDArray meanDelta = Transforms.abs(theoreticalMean.sub(myNormalizer.getMean()));
    INDArray labelDelta = Transforms.abs(theoreticallabelMean.sub(myNormalizer.getLabelMean()));
    INDArray meanDeltaPerc = meanDelta.div(theoreticalMean).mul(100);
    INDArray labelDeltaPerc = labelDelta.div(theoreticallabelMean).mul(100);
    double maxMeanDeltaPerc = meanDeltaPerc.max(1).getDouble(0, 0);
    assertTrue(maxMeanDeltaPerc < tolerancePerc);
    assertTrue(labelDeltaPerc.max(1).getDouble(0, 0) < tolerancePerc);

    INDArray stdDelta = Transforms.abs(theoreticalStd.sub(myNormalizer.getStd()));
    INDArray stdDeltaPerc = stdDelta.div(theoreticalStd).mul(100);
    INDArray stdlabelDeltaPerc =
                    Transforms.abs(theoreticallabelStd.sub(myNormalizer.getLabelStd())).div(theoreticallabelStd);
    double maxStdDeltaPerc = stdDeltaPerc.max(1).mul(100).getDouble(0, 0);
    double maxlabelStdDeltaPerc = stdlabelDeltaPerc.max(1).getDouble(0, 0);
    assertTrue(maxStdDeltaPerc < tolerancePerc);
    assertTrue(maxlabelStdDeltaPerc < tolerancePerc);


    // SAME TEST WITH THE ITERATOR
    int bSize = 10;
    tolerancePerc = 0.1; // 1% of correct value
    DataSetIterator sampleIter = new TestDataSetIterator(sampleDataSet, bSize);
    myNormalizer.fit(sampleIter);

    meanDelta = Transforms.abs(theoreticalMean.sub(myNormalizer.getMean()));
    meanDeltaPerc = meanDelta.div(theoreticalMean).mul(100);
    maxMeanDeltaPerc = meanDeltaPerc.max(1).getDouble(0, 0);
    assertTrue(maxMeanDeltaPerc < tolerancePerc);

    stdDelta = Transforms.abs(theoreticalMean.sub(myNormalizer.getMean()));
    stdDeltaPerc = stdDelta.div(theoreticalStd).mul(100);
    maxStdDeltaPerc = stdDeltaPerc.max(1).getDouble(0, 0);
    assertTrue(maxStdDeltaPerc < tolerancePerc);
}
 
Example 18
Source File: NormalizationTests.java    From DataVec with Apache License 2.0 4 votes vote down vote up
@Test
public void normalizationTests() {
    List<List<Writable>> data = new ArrayList<>();
    Schema.Builder builder = new Schema.Builder();
    int numColumns = 6;
    for (int i = 0; i < numColumns; i++)
        builder.addColumnDouble(String.valueOf(i));

    for (int i = 0; i < 5; i++) {
        List<Writable> record = new ArrayList<>(numColumns);
        data.add(record);
        for (int j = 0; j < numColumns; j++) {
            record.add(new DoubleWritable(1.0));
        }

    }

    INDArray arr = RecordConverter.toMatrix(data);

    Schema schema = builder.build();
    JavaRDD<List<Writable>> rdd = sc.parallelize(data);
    assertEquals(schema, DataFrames.fromStructType(DataFrames.fromSchema(schema)));
    assertEquals(rdd.collect(), DataFrames.toRecords(DataFrames.toDataFrame(schema, rdd)).getSecond().collect());

    DataRowsFacade dataFrame = DataFrames.toDataFrame(schema, rdd);
    dataFrame.get().show();
    Normalization.zeromeanUnitVariance(dataFrame).get().show();
    Normalization.normalize(dataFrame).get().show();

    //assert equivalent to the ndarray pre processing
    NormalizerStandardize standardScaler = new NormalizerStandardize();
    standardScaler.fit(new DataSet(arr.dup(), arr.dup()));
    INDArray standardScalered = arr.dup();
    standardScaler.transform(new DataSet(standardScalered, standardScalered));
    DataNormalization zeroToOne = new NormalizerMinMaxScaler();
    zeroToOne.fit(new DataSet(arr.dup(), arr.dup()));
    INDArray zeroToOnes = arr.dup();
    zeroToOne.transform(new DataSet(zeroToOnes, zeroToOnes));

    INDArray zeroMeanUnitVarianceDataFrame =
                    RecordConverter.toMatrix(Normalization.zeromeanUnitVariance(schema, rdd).collect());
    INDArray zeroMeanUnitVarianceDataFrameZeroToOne =
                    RecordConverter.toMatrix(Normalization.normalize(schema, rdd).collect());
    assertEquals(standardScalered, zeroMeanUnitVarianceDataFrame);
    assertTrue(zeroToOnes.equalsWithEps(zeroMeanUnitVarianceDataFrameZeroToOne, 1e-1));

}
 
Example 19
Source File: NormalizerStandardizeTest.java    From deeplearning4j with Apache License 2.0 4 votes vote down vote up
@Test
public void testTransform() {
    /*Random dataset is generated such that
        AX + B where X is from a normal distribution with mean 0 and std 1
        The mean of above will be B and std A
        Obtained mean and std dev are compared to theoretical
        Transformed values should be the same as X with the same seed.
     */
    long randSeed = 12345;

    int nFeatures = 2;
    int nSamples = 6400;
    int bsize = 8;
    int a = 5;
    int b = 100;
    INDArray sampleMean, sampleStd, sampleMeanDelta, sampleStdDelta, delta, deltaPerc;
    double maxDeltaPerc, sampleMeanSEM;

    genRandomDataSet normData = new genRandomDataSet(nSamples, nFeatures, a, b, randSeed);
    DataSet genRandExpected = normData.theoreticalTransform;
    genRandomDataSet expectedData = new genRandomDataSet(nSamples, nFeatures, 1, 0, randSeed);
    genRandomDataSet beforeTransformData = new genRandomDataSet(nSamples, nFeatures, a, b, randSeed);

    NormalizerStandardize myNormalizer = new NormalizerStandardize();
    DataSetIterator normIterator = normData.getIter(bsize);
    DataSetIterator genRandExpectedIter = new TestDataSetIterator(genRandExpected, bsize);
    DataSetIterator expectedIterator = expectedData.getIter(bsize);
    DataSetIterator beforeTransformIterator = beforeTransformData.getIter(bsize);

    myNormalizer.fit(normIterator);

    double tolerancePerc = 0.10; //within 0.1%
    sampleMean = myNormalizer.getMean();
    sampleMeanDelta = Transforms.abs(sampleMean.sub(normData.theoreticalMean));
    assertTrue(sampleMeanDelta.mul(100).div(normData.theoreticalMean).max().getDouble(0) < tolerancePerc);
    //sanity check to see if it's within the theoretical standard error of mean
    sampleMeanSEM = sampleMeanDelta.div(normData.theoreticalSEM).max().getDouble(0);
    assertTrue(sampleMeanSEM < 2.6); //99% of the time it should be within this many SEMs

    tolerancePerc = 1; //within 1% - std dev value
    sampleStd = myNormalizer.getStd();
    sampleStdDelta = Transforms.abs(sampleStd.sub(normData.theoreticalStd));

    double actualmaxDiff = sampleStdDelta.div(normData.theoreticalStd).max().mul(100).getDouble(0);
    assertTrue(actualmaxDiff < tolerancePerc);

    tolerancePerc = 1; //within 1%
    normIterator.setPreProcessor(myNormalizer);
    while (normIterator.hasNext()) {
        INDArray before = beforeTransformIterator.next().getFeatures();
        INDArray origBefore = genRandExpectedIter.next().getFeatures();
        INDArray after = normIterator.next().getFeatures();
        INDArray expected = expectedIterator.next().getFeatures();
        delta = Transforms.abs(after.sub(expected));
        deltaPerc = delta.div(Transforms.abs(before.sub(expected)));
        deltaPerc.muli(100);
        maxDeltaPerc = deltaPerc.max(0, 1).getDouble(0);
        /*
        System.out.println("=== BEFORE ===");
        System.out.println(before);
        System.out.println("=== ORIG BEFORE ===");
        System.out.println(origBefore);
        System.out.println("=== AFTER ===");
        System.out.println(after);
        System.out.println("=== SHOULD BE ===");
        System.out.println(expected);
        System.out.println("% diff, "+ maxDeltaPerc);
        */
        assertTrue(maxDeltaPerc < tolerancePerc);
    }
}
 
Example 20
Source File: PreProcessor3D4DTest.java    From deeplearning4j with Apache License 2.0 4 votes vote down vote up
@Test
public void testBruteForce3dMaskLabels() {

    NormalizerStandardize myNormalizer = new NormalizerStandardize();
    myNormalizer.fitLabel(true);
    NormalizerMinMaxScaler myMinMaxScaler = new NormalizerMinMaxScaler();
    myMinMaxScaler.fitLabel(true);

    //generating a dataset with consecutive numbers as feature values. Dataset also has masks
    int samples = 100;
    INDArray featureScale = Nd4j.create(new double[] {1, 2, 10}).reshape(3, 1);
    int timeStepsU = 5;
    Construct3dDataSet sampleU = new Construct3dDataSet(featureScale, timeStepsU, samples, 1);
    int timeStepsV = 3;
    Construct3dDataSet sampleV = new Construct3dDataSet(featureScale, timeStepsV, samples, sampleU.newOrigin);
    List<DataSet> dataSetList = new ArrayList<DataSet>();
    dataSetList.add(sampleU.sampleDataSet);
    dataSetList.add(sampleV.sampleDataSet);

    DataSet fullDataSetA = DataSet.merge(dataSetList);
    DataSet fullDataSetAA = fullDataSetA.copy();
    //This should be the same datasets as above without a mask
    Construct3dDataSet fullDataSetNoMask =
                    new Construct3dDataSet(featureScale, timeStepsU + timeStepsV, samples, 1);

    //preprocessors - label and feature values are the same
    myNormalizer.fit(fullDataSetA);
    assertEquals(myNormalizer.getMean().castTo(DataType.FLOAT), fullDataSetNoMask.expectedMean.castTo(DataType.FLOAT));
    assertEquals(myNormalizer.getStd().castTo(DataType.FLOAT), fullDataSetNoMask.expectedStd.castTo(DataType.FLOAT));
    assertEquals(myNormalizer.getLabelMean().castTo(DataType.FLOAT), fullDataSetNoMask.expectedMean.castTo(DataType.FLOAT));
    assertEquals(myNormalizer.getLabelStd().castTo(DataType.FLOAT), fullDataSetNoMask.expectedStd.castTo(DataType.FLOAT));

    myMinMaxScaler.fit(fullDataSetAA);
    assertEquals(myMinMaxScaler.getMin().castTo(DataType.FLOAT), fullDataSetNoMask.expectedMin.castTo(DataType.FLOAT));
    assertEquals(myMinMaxScaler.getMax().castTo(DataType.FLOAT), fullDataSetNoMask.expectedMax.castTo(DataType.FLOAT));
    assertEquals(myMinMaxScaler.getLabelMin().castTo(DataType.FLOAT), fullDataSetNoMask.expectedMin.castTo(DataType.FLOAT));
    assertEquals(myMinMaxScaler.getLabelMax().castTo(DataType.FLOAT), fullDataSetNoMask.expectedMax.castTo(DataType.FLOAT));


    //Same Test with an Iterator, values should be close for std, exact for everything else
    DataSetIterator sampleIterA = new TestDataSetIterator(fullDataSetA, 5);
    DataSetIterator sampleIterB = new TestDataSetIterator(fullDataSetAA, 5);

    myNormalizer.fit(sampleIterA);
    assertEquals(myNormalizer.getMean().castTo(DataType.FLOAT), fullDataSetNoMask.expectedMean.castTo(DataType.FLOAT));
    assertEquals(myNormalizer.getLabelMean().castTo(DataType.FLOAT), fullDataSetNoMask.expectedMean.castTo(DataType.FLOAT));
    double diff1 = Transforms.abs(myNormalizer.getStd().div(fullDataSetNoMask.expectedStd).sub(1)).maxNumber().doubleValue();
    double diff2 = Transforms.abs(myNormalizer.getLabelStd().div(fullDataSetNoMask.expectedStd).sub(1)).maxNumber().doubleValue();
    assertTrue(diff1 < 0.01);
    assertTrue(diff2 < 0.01);

    myMinMaxScaler.fit(sampleIterB);
    assertEquals(myMinMaxScaler.getMin().castTo(DataType.FLOAT), fullDataSetNoMask.expectedMin.castTo(DataType.FLOAT));
    assertEquals(myMinMaxScaler.getMax().castTo(DataType.FLOAT), fullDataSetNoMask.expectedMax.castTo(DataType.FLOAT));
    assertEquals(myMinMaxScaler.getLabelMin().castTo(DataType.FLOAT), fullDataSetNoMask.expectedMin.castTo(DataType.FLOAT));
    assertEquals(myMinMaxScaler.getLabelMax().castTo(DataType.FLOAT), fullDataSetNoMask.expectedMax.castTo(DataType.FLOAT));
}