org.nd4j.linalg.dataset.api.preprocessor.NormalizerStandardize Java Examples

The following examples show how to use org.nd4j.linalg.dataset.api.preprocessor.NormalizerStandardize. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: IrisFileDataSource.java    From FederatedAndroidTrainer with MIT License 7 votes vote down vote up
private void createDataSource() throws IOException, InterruptedException {
    //First: get the dataset using the record reader. CSVRecordReader handles loading/parsing
    int numLinesToSkip = 0;
    String delimiter = ",";
    RecordReader recordReader = new CSVRecordReader(numLinesToSkip, delimiter);
    recordReader.initialize(new InputStreamInputSplit(dataFile));

    //Second: the RecordReaderDataSetIterator handles conversion to DataSet objects, ready for use in neural network
    int labelIndex = 4;     //5 values in each row of the iris.txt CSV: 4 input features followed by an integer label (class) index. Labels are the 5th value (index 4) in each row
    int numClasses = 3;     //3 classes (types of iris flowers) in the iris data set. Classes have integer values 0, 1 or 2

    DataSetIterator iterator = new RecordReaderDataSetIterator(recordReader, batchSize, labelIndex, numClasses);
    DataSet allData = iterator.next();
    allData.shuffle();

    SplitTestAndTrain testAndTrain = allData.splitTestAndTrain(0.80);  //Use 80% of data for training

    trainingData = testAndTrain.getTrain();
    testData = testAndTrain.getTest();

    //We need to normalize our data. We'll use NormalizeStandardize (which gives us mean 0, unit variance):
    DataNormalization normalizer = new NormalizerStandardize();
    normalizer.fit(trainingData);           //Collect the statistics (mean/stdev) from the training data. This does not modify the input data
    normalizer.transform(trainingData);     //Apply normalization to the training data
    normalizer.transform(testData);         //Apply normalization to the test data. This is using statistics calculated from the *training* set
}
 
Example #2
Source File: PreProcessor3D4DTest.java    From deeplearning4j with Apache License 2.0 6 votes vote down vote up
@Test
public void testBruteForce4d() {
    Construct4dDataSet imageDataSet = new Construct4dDataSet(10, 5, 10, 15);

    NormalizerStandardize myNormalizer = new NormalizerStandardize();
    myNormalizer.fit(imageDataSet.sampleDataSet);
    assertEquals(imageDataSet.expectedMean, myNormalizer.getMean());

    float aat = Transforms.abs(myNormalizer.getStd().div(imageDataSet.expectedStd).sub(1)).maxNumber().floatValue();
    float abt = myNormalizer.getStd().maxNumber().floatValue();
    float act = imageDataSet.expectedStd.maxNumber().floatValue();
    System.out.println("ValA: " + aat);
    System.out.println("ValB: " + abt);
    System.out.println("ValC: " + act);
    assertTrue(aat < 0.05);

    NormalizerMinMaxScaler myMinMaxScaler = new NormalizerMinMaxScaler();
    myMinMaxScaler.fit(imageDataSet.sampleDataSet);
    assertEquals(imageDataSet.expectedMin, myMinMaxScaler.getMin());
    assertEquals(imageDataSet.expectedMax, myMinMaxScaler.getMax());

    DataSet copyDataSet = imageDataSet.sampleDataSet.copy();
    myNormalizer.transform(copyDataSet);
}
 
Example #3
Source File: PreProcessor3D4DTest.java    From nd4j with Apache License 2.0 6 votes vote down vote up
@Test
public void testBruteForce4d() {
    Construct4dDataSet imageDataSet = new Construct4dDataSet(10, 5, 10, 15);

    NormalizerStandardize myNormalizer = new NormalizerStandardize();
    myNormalizer.fit(imageDataSet.sampleDataSet);
    assertEquals(imageDataSet.expectedMean, myNormalizer.getMean());

    float aat = Transforms.abs(myNormalizer.getStd().div(imageDataSet.expectedStd).sub(1)).maxNumber().floatValue();
    float abt = myNormalizer.getStd().maxNumber().floatValue();
    float act = imageDataSet.expectedStd.maxNumber().floatValue();
    System.out.println("ValA: " + aat);
    System.out.println("ValB: " + abt);
    System.out.println("ValC: " + act);
    assertTrue(aat < 0.05);

    NormalizerMinMaxScaler myMinMaxScaler = new NormalizerMinMaxScaler();
    myMinMaxScaler.fit(imageDataSet.sampleDataSet);
    assertEquals(imageDataSet.expectedMin, myMinMaxScaler.getMin());
    assertEquals(imageDataSet.expectedMax, myMinMaxScaler.getMax());

    DataSet copyDataSet = imageDataSet.sampleDataSet.copy();
    myNormalizer.transform(copyDataSet);
}
 
Example #4
Source File: NormalizerStandardizeTest.java    From nd4j with Apache License 2.0 6 votes vote down vote up
@Test
public void testRevert() {
    double tolerancePerc = 0.01; // 0.01% of correct value
    int nSamples = 500;
    int nFeatures = 3;

    INDArray featureSet = Nd4j.randn(nSamples, nFeatures);
    INDArray labelSet = Nd4j.zeros(nSamples, 1);
    DataSet sampleDataSet = new DataSet(featureSet, labelSet);

    NormalizerStandardize myNormalizer = new NormalizerStandardize();
    myNormalizer.fit(sampleDataSet);
    DataSet transformed = sampleDataSet.copy();
    myNormalizer.transform(transformed);
    //System.out.println(transformed.getFeatures());
    myNormalizer.revert(transformed);
    //System.out.println(transformed.getFeatures());
    INDArray delta = Transforms.abs(transformed.getFeatures().sub(sampleDataSet.getFeatures()))
                    .div(sampleDataSet.getFeatures());
    double maxdeltaPerc = delta.max(0, 1).mul(100).getDouble(0, 0);
    assertTrue(maxdeltaPerc < tolerancePerc);
}
 
Example #5
Source File: ModelSerializerTest.java    From deeplearning4j with Apache License 2.0 6 votes vote down vote up
@Test
public void testJavaSerde_1() throws Exception {
    int nIn = 5;
    int nOut = 6;

    ComputationGraphConfiguration conf = new NeuralNetConfiguration.Builder().seed(12345).l1(0.01)
            .graphBuilder()
            .addInputs("in")
            .layer("0", new OutputLayer.Builder().nIn(nIn).nOut(nOut).build(), "in")
            .setOutputs("0")
            .validateOutputLayerConfig(false)
            .build();

    ComputationGraph net = new ComputationGraph(conf);
    net.init();

    DataSet dataSet = trivialDataSet();
    NormalizerStandardize norm = new NormalizerStandardize();
    norm.fit(dataSet);

    val b = SerializationUtils.serialize(net);

    ComputationGraph restored = SerializationUtils.deserialize(b);

    assertEquals(net, restored);
}
 
Example #6
Source File: ModelSerializerTest.java    From deeplearning4j with Apache License 2.0 6 votes vote down vote up
@Test
public void testJavaSerde_2() throws Exception {
    int nIn = 5;
    int nOut = 6;

    MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder().seed(12345).l1(0.01)
            .list()
            .layer(0, new OutputLayer.Builder().nIn(nIn).nOut(nOut).activation(Activation.SOFTMAX).build())
            .build();

    MultiLayerNetwork net = new MultiLayerNetwork(conf);
    net.init();

    DataSet dataSet = trivialDataSet();
    NormalizerStandardize norm = new NormalizerStandardize();
    norm.fit(dataSet);

    val b = SerializationUtils.serialize(net);

    MultiLayerNetwork restored = SerializationUtils.deserialize(b);

    assertEquals(net, restored);
}
 
Example #7
Source File: RPTreeTest.java    From deeplearning4j with Apache License 2.0 6 votes vote down vote up
@Test
public void testRPTree() throws Exception {
    DataSetIterator mnist = new MnistDataSetIterator(150,150);
    RPTree rpTree = new RPTree(784,50);
    DataSet d = mnist.next();
    NormalizerStandardize normalizerStandardize = new NormalizerStandardize();
    normalizerStandardize.fit(d);
    normalizerStandardize.transform(d.getFeatures());
    INDArray data = d.getFeatures();
    rpTree.buildTree(data);
    assertEquals(4,rpTree.getLeaves().size());
    assertEquals(0,rpTree.getRoot().getDepth());

    List<Integer> candidates = rpTree.getCandidates(data.getRow(0));
    assertFalse(candidates.isEmpty());
    assertEquals(10,rpTree.query(data.slice(0),10).length());
    System.out.println(candidates.size());

    rpTree.addNodeAtIndex(150,data.getRow(0));

}
 
Example #8
Source File: DiabetesFileDataSource.java    From FederatedAndroidTrainer with MIT License 6 votes vote down vote up
private void createDataSource() throws IOException, InterruptedException {
    //First: get the dataset using the record reader. CSVRecordReader handles loading/parsing
    int numLinesToSkip = 0;
    String delimiter = ",";
    RecordReader recordReader = new CSVRecordReader(numLinesToSkip, delimiter);
    recordReader.initialize(new InputStreamInputSplit(dataFile));

    //Second: the RecordReaderDataSetIterator handles conversion to DataSet objects, ready for use in neural network
    int labelIndex = 11;

    DataSetIterator iterator = new RecordReaderDataSetIterator(recordReader, batchSize, labelIndex, labelIndex, true);
    DataSet allData = iterator.next();

    SplitTestAndTrain testAndTrain = allData.splitTestAndTrain(0.80);  //Use 80% of data for training

    trainingData = testAndTrain.getTrain();
    testData = testAndTrain.getTest();

    //We need to normalize our data. We'll use NormalizeStandardize (which gives us mean 0, unit variance):
    DataNormalization normalizer = new NormalizerStandardize();
    normalizer.fit(trainingData);           //Collect the statistics (mean/stdev) from the training data. This does not modify the input data
    normalizer.transform(trainingData);     //Apply normalization to the training data
    normalizer.transform(testData);         //Apply normalization to the test data. This is using statistics calculated from the *training* set
}
 
Example #9
Source File: NormalizerStandardizeTest.java    From deeplearning4j with Apache License 2.0 6 votes vote down vote up
@Test
public void testRevert() {
    double tolerancePerc = 0.01; // 0.01% of correct value
    int nSamples = 500;
    int nFeatures = 3;

    INDArray featureSet = Nd4j.randn(nSamples, nFeatures);
    INDArray labelSet = Nd4j.zeros(nSamples, 1);
    DataSet sampleDataSet = new DataSet(featureSet, labelSet);

    NormalizerStandardize myNormalizer = new NormalizerStandardize();
    myNormalizer.fit(sampleDataSet);
    DataSet transformed = sampleDataSet.copy();
    myNormalizer.transform(transformed);
    //System.out.println(transformed.getFeatures());
    myNormalizer.revert(transformed);
    //System.out.println(transformed.getFeatures());
    INDArray delta = Transforms.abs(transformed.getFeatures().sub(sampleDataSet.getFeatures()))
                    .div(sampleDataSet.getFeatures());
    double maxdeltaPerc = delta.max(0, 1).mul(100).getDouble(0);
    assertTrue(maxdeltaPerc < tolerancePerc);
}
 
Example #10
Source File: StandardizeSerializerStrategy.java    From deeplearning4j with Apache License 2.0 5 votes vote down vote up
@Override
public NormalizerStandardize restore(@NonNull InputStream stream) throws IOException {
    DataInputStream dis = new DataInputStream(stream);

    boolean fitLabels = dis.readBoolean();

    NormalizerStandardize result = new NormalizerStandardize(Nd4j.read(dis), Nd4j.read(dis));
    result.fitLabel(fitLabels);
    if (fitLabels) {
        result.setLabelStats(Nd4j.read(dis), Nd4j.read(dis));
    }

    return result;
}
 
Example #11
Source File: DataSet.java    From nd4j with Apache License 2.0 5 votes vote down vote up
@Override
public void normalize() {
    //FeatureUtil.normalizeMatrix(getFeatures());
    NormalizerStandardize inClassPreProcessor = new NormalizerStandardize();
    inClassPreProcessor.fit(this);
    inClassPreProcessor.transform(this);
}
 
Example #12
Source File: StandardizeSerializerStrategy.java    From deeplearning4j with Apache License 2.0 5 votes vote down vote up
@Override
public void write(@NonNull NormalizerStandardize normalizer, @NonNull OutputStream stream) throws IOException {
    try (DataOutputStream dos = new DataOutputStream(stream)) {
        dos.writeBoolean(normalizer.isFitLabel());

        Nd4j.write(normalizer.getMean(), dos);
        Nd4j.write(normalizer.getStd(), dos);

        if (normalizer.isFitLabel()) {
            Nd4j.write(normalizer.getLabelMean(), dos);
            Nd4j.write(normalizer.getLabelStd(), dos);
        }
        dos.flush();
    }
}
 
Example #13
Source File: NormalizerStandardizeTest.java    From deeplearning4j with Apache License 2.0 5 votes vote down vote up
@Test
public void testConstant() {
    double tolerancePerc = 10.0; // 10% of correct value
    int nSamples = 500;
    int nFeatures = 3;
    int constant = 100;

    INDArray featureSet = Nd4j.zeros(nSamples, nFeatures).add(constant);
    INDArray labelSet = Nd4j.zeros(nSamples, 1);
    DataSet sampleDataSet = new DataSet(featureSet, labelSet);


    NormalizerStandardize myNormalizer = new NormalizerStandardize();
    myNormalizer.fit(sampleDataSet);
    //Checking if we gets nans
    assertFalse(Double.isNaN(myNormalizer.getStd().getDouble(0)));

    myNormalizer.transform(sampleDataSet);
    //Checking if we gets nans, because std dev is zero
    assertFalse(Double.isNaN(sampleDataSet.getFeatures().min(0, 1).getDouble(0)));
    //Checking to see if transformed values are close enough to zero
    assertEquals(Transforms.abs(sampleDataSet.getFeatures()).max(0, 1).getDouble(0), 0,
                    constant * tolerancePerc / 100.0);

    myNormalizer.revert(sampleDataSet);
    //Checking if we gets nans, because std dev is zero
    assertFalse(Double.isNaN(sampleDataSet.getFeatures().min(0, 1).getDouble(0)));
    assertEquals(Transforms.abs(sampleDataSet.getFeatures().sub(featureSet)).min(0, 1).getDouble(0), 0,
                    constant * tolerancePerc / 100.0);
}
 
Example #14
Source File: NormalizerStandardizeTest.java    From deeplearning4j with Apache License 2.0 5 votes vote down vote up
@Test
public void testUnderOverflow() {
    // This dataset will be basically constant with a small std deviation
    // And the constant is large. Checking if algorithm can handle
    double tolerancePerc = 1; //Within 1 %
    double toleranceAbs = 0.0005;
    int nSamples = 1000;
    int bSize = 10;
    int x = -1000000, y = 1000000;
    double z = 1000000;

    INDArray featureX = Nd4j.rand(nSamples, 1).mul(1).add(x);
    INDArray featureY = Nd4j.rand(nSamples, 1).mul(2).add(y);
    INDArray featureZ = Nd4j.rand(nSamples, 1).mul(3).add(z);
    INDArray featureSet = Nd4j.concat(1, featureX, featureY, featureZ);
    INDArray labelSet = Nd4j.zeros(nSamples, 1);
    DataSet sampleDataSet = new DataSet(featureSet, labelSet);
    DataSetIterator sampleIter = new TestDataSetIterator(sampleDataSet, bSize);

    INDArray theoreticalMean = Nd4j.create(new float[] {x, y, (float) z}).castTo(Nd4j.defaultFloatingPointType()).reshape(1, -1);

    NormalizerStandardize myNormalizer = new NormalizerStandardize();
    myNormalizer.fit(sampleIter);

    INDArray meanDelta = Transforms.abs(theoreticalMean.sub(myNormalizer.getMean()));
    INDArray meanDeltaPerc = meanDelta.mul(100).div(theoreticalMean);
    assertTrue(meanDeltaPerc.max(1).getDouble(0) < tolerancePerc);

    //this just has to not barf
    //myNormalizer.transform(sampleIter);
    myNormalizer.transform(sampleDataSet);
}
 
Example #15
Source File: StandardizeSerializerStrategy.java    From nd4j with Apache License 2.0 5 votes vote down vote up
@Override
public NormalizerStandardize restore(@NonNull InputStream stream) throws IOException {
    DataInputStream dis = new DataInputStream(stream);

    boolean fitLabels = dis.readBoolean();

    NormalizerStandardize result = new NormalizerStandardize(Nd4j.read(dis), Nd4j.read(dis));
    result.fitLabel(fitLabels);
    if (fitLabels) {
        result.setLabelStats(Nd4j.read(dis), Nd4j.read(dis));
    }

    return result;
}
 
Example #16
Source File: PreProcessor3D4DTest.java    From deeplearning4j with Apache License 2.0 5 votes vote down vote up
@Test
public void testBruteForce3d() {

    NormalizerStandardize myNormalizer = new NormalizerStandardize();
    NormalizerMinMaxScaler myMinMaxScaler = new NormalizerMinMaxScaler();

    int timeSteps = 15;
    int samples = 100;
    //multiplier for the features
    INDArray featureScaleA = Nd4j.create(new double[] {1, -2, 3}).reshape(3,1);
    INDArray featureScaleB = Nd4j.create(new double[] {2, 2, 3}).reshape(3,1);

    Construct3dDataSet caseA = new Construct3dDataSet(featureScaleA, timeSteps, samples, 1);
    Construct3dDataSet caseB = new Construct3dDataSet(featureScaleB, timeSteps, samples, 1);

    myNormalizer.fit(caseA.sampleDataSet);
    assertEquals(caseA.expectedMean.castTo(DataType.FLOAT), myNormalizer.getMean().castTo(DataType.FLOAT));
    assertTrue(Transforms.abs(myNormalizer.getStd().div(caseA.expectedStd).sub(1)).maxNumber().floatValue() < 0.01);

    myMinMaxScaler.fit(caseB.sampleDataSet);
    assertEquals(caseB.expectedMin.castTo(DataType.FLOAT), myMinMaxScaler.getMin().castTo(DataType.FLOAT));
    assertEquals(caseB.expectedMax.castTo(DataType.FLOAT), myMinMaxScaler.getMax().castTo(DataType.FLOAT));

    //Same Test with an Iterator, values should be close for std, exact for everything else
    DataSetIterator sampleIterA = new TestDataSetIterator(caseA.sampleDataSet, 5);
    DataSetIterator sampleIterB = new TestDataSetIterator(caseB.sampleDataSet, 5);

    myNormalizer.fit(sampleIterA);
    assertEquals(myNormalizer.getMean().castTo(DataType.FLOAT), caseA.expectedMean.castTo(DataType.FLOAT));
    assertTrue(Transforms.abs(myNormalizer.getStd().div(caseA.expectedStd).sub(1)).maxNumber().floatValue() < 0.01);

    myMinMaxScaler.fit(sampleIterB);
    assertEquals(myMinMaxScaler.getMin().castTo(DataType.FLOAT), caseB.expectedMin.castTo(DataType.FLOAT));
    assertEquals(myMinMaxScaler.getMax().castTo(DataType.FLOAT), caseB.expectedMax.castTo(DataType.FLOAT));

}
 
Example #17
Source File: StandardizeSerializerStrategy.java    From nd4j with Apache License 2.0 5 votes vote down vote up
@Override
public void write(@NonNull NormalizerStandardize normalizer, @NonNull OutputStream stream) throws IOException {
    try (DataOutputStream dos = new DataOutputStream(stream)) {
        dos.writeBoolean(normalizer.isFitLabel());

        Nd4j.write(normalizer.getMean(), dos);
        Nd4j.write(normalizer.getStd(), dos);

        if (normalizer.isFitLabel()) {
            Nd4j.write(normalizer.getLabelMean(), dos);
            Nd4j.write(normalizer.getLabelStd(), dos);
        }
        dos.flush();
    }
}
 
Example #18
Source File: NormalizerTests.java    From nd4j with Apache License 2.0 5 votes vote down vote up
@Before
public void randomData() {
    Nd4j.getRandom().setSeed(12345);
    batchSize = 13;
    batchCount = 20;
    lastBatch = batchSize / 2;
    INDArray origFeatures = Nd4j.rand(batchCount * batchSize + lastBatch, 10);
    INDArray origLabels = Nd4j.rand(batchCount * batchSize + lastBatch, 3);
    data = new DataSet(origFeatures, origLabels);
    stdScaler = new NormalizerStandardize();
    minMaxScaler = new NormalizerMinMaxScaler();
}
 
Example #19
Source File: NormalizerStandardizeTest.java    From nd4j with Apache License 2.0 5 votes vote down vote up
@Test
public void testConstant() {
    double tolerancePerc = 10.0; // 10% of correct value
    int nSamples = 500;
    int nFeatures = 3;
    int constant = 100;

    INDArray featureSet = Nd4j.zeros(nSamples, nFeatures).add(constant);
    INDArray labelSet = Nd4j.zeros(nSamples, 1);
    DataSet sampleDataSet = new DataSet(featureSet, labelSet);


    NormalizerStandardize myNormalizer = new NormalizerStandardize();
    myNormalizer.fit(sampleDataSet);
    //Checking if we gets nans
    assertFalse(Double.isNaN(myNormalizer.getStd().getDouble(0)));

    myNormalizer.transform(sampleDataSet);
    //Checking if we gets nans, because std dev is zero
    assertFalse(Double.isNaN(sampleDataSet.getFeatures().min(0, 1).getDouble(0)));
    //Checking to see if transformed values are close enough to zero
    assertEquals(Transforms.abs(sampleDataSet.getFeatures()).max(0, 1).getDouble(0, 0), 0,
                    constant * tolerancePerc / 100.0);

    myNormalizer.revert(sampleDataSet);
    //Checking if we gets nans, because std dev is zero
    assertFalse(Double.isNaN(sampleDataSet.getFeatures().min(0, 1).getDouble(0)));
    assertEquals(Transforms.abs(sampleDataSet.getFeatures().sub(featureSet)).min(0, 1).getDouble(0), 0,
                    constant * tolerancePerc / 100.0);
}
 
Example #20
Source File: NormalizerStandardizeTest.java    From deeplearning4j with Apache License 2.0 5 votes vote down vote up
@Test
public void testDifferentBatchSizes() {
    // Create 6x1 matrix of the numbers 1 through 6
    INDArray values = Nd4j.linspace(1, 6, 6, DataType.DOUBLE).reshape(1, -1).transpose();
    DataSet dataSet = new DataSet(values, values);

    // Test fitting a DataSet
    NormalizerStandardize norm1 = new NormalizerStandardize();
    norm1.fit(dataSet);
    assertEquals(3.5f, norm1.getMean().getFloat(0), 1e-6);
    assertEquals(1.70783f, norm1.getStd().getFloat(0), 1e-4);

    // Test fitting an iterator with equal batch sizes
    DataSetIterator testIter1 = new TestDataSetIterator(dataSet, 3); // Will yield 2 batches of 3 rows
    NormalizerStandardize norm2 = new NormalizerStandardize();
    norm2.fit(testIter1);
    assertEquals(3.5f, norm2.getMean().getFloat(0), 1e-6);
    assertEquals(1.70783f, norm2.getStd().getFloat(0), 1e-4);

    // Test fitting an iterator with varying batch sizes
    DataSetIterator testIter2 = new TestDataSetIterator(dataSet, 4); // Will yield batch of 4 and batch of 2 rows
    NormalizerStandardize norm3 = new NormalizerStandardize();
    norm3.fit(testIter2);
    assertEquals(3.5f, norm3.getMean().getFloat(0), 1e-6);
    assertEquals(1.70783f, norm3.getStd().getFloat(0), 1e-4);

    // Test fitting an iterator with batches of single rows
    DataSetIterator testIter3 = new TestDataSetIterator(dataSet, 1); // Will yield 6 batches of 1 row
    NormalizerStandardize norm4 = new NormalizerStandardize();
    norm4.fit(testIter3);
    assertEquals(3.5f, norm4.getMean().getFloat(0), 1e-6);
    assertEquals(1.70783f, norm4.getStd().getFloat(0), 1e-4);
}
 
Example #21
Source File: DataSetIteratorHelper.java    From Java-Deep-Learning-Cookbook with MIT License 5 votes vote down vote up
private static DataSetIteratorSplitter createDataSetSplitter() throws IOException, InterruptedException {
    final RecordReader recordReader = DataSetIteratorHelper.generateReader(new ClassPathResource("Churn_Modelling.csv").getFile());
    final DataSetIterator dataSetIterator = new RecordReaderDataSetIterator.Builder(recordReader,batchSize)
            .classification(labelIndex,numClasses)
            .build();
    final DataNormalization dataNormalization = new NormalizerStandardize();
    dataNormalization.fit(dataSetIterator);
    dataSetIterator.setPreProcessor(dataNormalization);
    final DataSetIteratorSplitter dataSetIteratorSplitter = new DataSetIteratorSplitter(dataSetIterator,1250,0.8);
    return dataSetIteratorSplitter;
}
 
Example #22
Source File: NormalizerStandardizeTest.java    From nd4j with Apache License 2.0 5 votes vote down vote up
@Test
public void testUnderOverflow() {
    // This dataset will be basically constant with a small std deviation
    // And the constant is large. Checking if algorithm can handle
    double tolerancePerc = 1; //Within 1 %
    double toleranceAbs = 0.0005;
    int nSamples = 1000;
    int bSize = 10;
    int x = -1000000, y = 1000000;
    double z = 1000000;

    INDArray featureX = Nd4j.rand(nSamples, 1).mul(1).add(x);
    INDArray featureY = Nd4j.rand(nSamples, 1).mul(2).add(y);
    INDArray featureZ = Nd4j.rand(nSamples, 1).mul(3).add(z);
    INDArray featureSet = Nd4j.concat(1, featureX, featureY, featureZ);
    INDArray labelSet = Nd4j.zeros(nSamples, 1);
    DataSet sampleDataSet = new DataSet(featureSet, labelSet);
    DataSetIterator sampleIter = new TestDataSetIterator(sampleDataSet, bSize);

    INDArray theoreticalMean = Nd4j.create(new double[] {x, y, z});

    NormalizerStandardize myNormalizer = new NormalizerStandardize();
    myNormalizer.fit(sampleIter);

    INDArray meanDelta = Transforms.abs(theoreticalMean.sub(myNormalizer.getMean()));
    INDArray meanDeltaPerc = meanDelta.mul(100).div(theoreticalMean);
    assertTrue(meanDeltaPerc.max(1).getDouble(0, 0) < tolerancePerc);

    //this just has to not barf
    //myNormalizer.transform(sampleIter);
    myNormalizer.transform(sampleDataSet);
}
 
Example #23
Source File: NormalizerStandardizeTest.java    From nd4j with Apache License 2.0 5 votes vote down vote up
@Test
public void testDifferentBatchSizes() {
    // Create 6x1 matrix of the numbers 1 through 6
    INDArray values = Nd4j.linspace(1, 6, 6).transpose();
    DataSet dataSet = new DataSet(values, values);

    // Test fitting a DataSet
    NormalizerStandardize norm1 = new NormalizerStandardize();
    norm1.fit(dataSet);
    assertEquals(3.5f, norm1.getMean().getFloat(0), 1e-6);
    assertEquals(1.70783f, norm1.getStd().getFloat(0), 1e-4);

    // Test fitting an iterator with equal batch sizes
    DataSetIterator testIter1 = new TestDataSetIterator(dataSet, 3); // Will yield 2 batches of 3 rows
    NormalizerStandardize norm2 = new NormalizerStandardize();
    norm2.fit(testIter1);
    assertEquals(3.5f, norm2.getMean().getFloat(0), 1e-6);
    assertEquals(1.70783f, norm2.getStd().getFloat(0), 1e-4);

    // Test fitting an iterator with varying batch sizes
    DataSetIterator testIter2 = new TestDataSetIterator(dataSet, 4); // Will yield batch of 4 and batch of 2 rows
    NormalizerStandardize norm3 = new NormalizerStandardize();
    norm3.fit(testIter2);
    assertEquals(3.5f, norm3.getMean().getFloat(0), 1e-6);
    assertEquals(1.70783f, norm3.getStd().getFloat(0), 1e-4);

    // Test fitting an iterator with batches of single rows
    DataSetIterator testIter3 = new TestDataSetIterator(dataSet, 1); // Will yield 6 batches of 1 row
    NormalizerStandardize norm4 = new NormalizerStandardize();
    norm4.fit(testIter3);
    assertEquals(3.5f, norm4.getMean().getFloat(0), 1e-6);
    assertEquals(1.70783f, norm4.getStd().getFloat(0), 1e-4);
}
 
Example #24
Source File: DataSet.java    From deeplearning4j with Apache License 2.0 5 votes vote down vote up
@Override
public void normalize() {
    //FeatureUtil.normalizeMatrix(getFeatures());
    NormalizerStandardize inClassPreProcessor = new NormalizerStandardize();
    inClassPreProcessor.fit(this);
    inClassPreProcessor.transform(this);
}
 
Example #25
Source File: EvaluationToolsTests.java    From deeplearning4j with Apache License 2.0 5 votes vote down vote up
@Test
    public void testRocMultiToHtml() throws Exception {
        DataSetIterator iter = new IrisDataSetIterator(150, 150);

        MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder().weightInit(WeightInit.XAVIER).list()
                        .layer(0, new DenseLayer.Builder().nIn(4).nOut(4).activation(Activation.TANH).build()).layer(1,
                                        new OutputLayer.Builder().nIn(4).nOut(3).activation(Activation.SOFTMAX)
                                                        .lossFunction(LossFunctions.LossFunction.MCXENT).build())
                        .build();
        MultiLayerNetwork net = new MultiLayerNetwork(conf);
        net.init();

        NormalizerStandardize ns = new NormalizerStandardize();
        DataSet ds = iter.next();
        ns.fit(ds);
        ns.transform(ds);

        for (int i = 0; i < 30; i++) {
            net.fit(ds);
        }

        for (int numSteps : new int[] {20, 0}) {
            ROCMultiClass roc = new ROCMultiClass(numSteps);
            iter.reset();

            INDArray f = ds.getFeatures();
            INDArray l = ds.getLabels();
            INDArray out = net.output(f);
            roc.eval(l, out);


            String str = EvaluationTools.rocChartToHtml(roc, Arrays.asList("setosa", "versicolor", "virginica"));
//            System.out.println(str);
        }
    }
 
Example #26
Source File: PreProcessor3D4DTest.java    From nd4j with Apache License 2.0 5 votes vote down vote up
@Test
public void testBruteForce3d() {

    NormalizerStandardize myNormalizer = new NormalizerStandardize();
    NormalizerMinMaxScaler myMinMaxScaler = new NormalizerMinMaxScaler();

    int timeSteps = 15;
    int samples = 100;
    //multiplier for the features
    INDArray featureScaleA = Nd4j.create(new double[] {1, -2, 3}).reshape(3, 1);
    INDArray featureScaleB = Nd4j.create(new double[] {2, 2, 3}).reshape(3, 1);

    Construct3dDataSet caseA = new Construct3dDataSet(featureScaleA, timeSteps, samples, 1);
    Construct3dDataSet caseB = new Construct3dDataSet(featureScaleB, timeSteps, samples, 1);

    myNormalizer.fit(caseA.sampleDataSet);
    assertEquals(caseA.expectedMean, myNormalizer.getMean());
    assertTrue(Transforms.abs(myNormalizer.getStd().div(caseA.expectedStd).sub(1)).maxNumber().floatValue() < 0.01);

    myMinMaxScaler.fit(caseB.sampleDataSet);
    assertEquals(caseB.expectedMin, myMinMaxScaler.getMin());
    assertEquals(caseB.expectedMax, myMinMaxScaler.getMax());

    //Same Test with an Iterator, values should be close for std, exact for everything else
    DataSetIterator sampleIterA = new TestDataSetIterator(caseA.sampleDataSet, 5);
    DataSetIterator sampleIterB = new TestDataSetIterator(caseB.sampleDataSet, 5);

    myNormalizer.fit(sampleIterA);
    assertEquals(myNormalizer.getMean(), caseA.expectedMean);
    assertTrue(Transforms.abs(myNormalizer.getStd().div(caseA.expectedStd).sub(1)).maxNumber().floatValue() < 0.01);

    myMinMaxScaler.fit(sampleIterB);
    assertEquals(myMinMaxScaler.getMin(), caseB.expectedMin);
    assertEquals(myMinMaxScaler.getMax(), caseB.expectedMax);

}
 
Example #27
Source File: OCNNOutputLayerTest.java    From deeplearning4j with Apache License 2.0 5 votes vote down vote up
public DataSetIterator getNormalizedIterator() {
    DataSetIterator dataSetIterator = new IrisDataSetIterator(150,150);
    NormalizerStandardize normalizerStandardize = new NormalizerStandardize();
    normalizerStandardize.fit(dataSetIterator);
    dataSetIterator.reset();
    dataSetIterator.setPreProcessor(normalizerStandardize);
    return dataSetIterator;
}
 
Example #28
Source File: CustomerRetentionPredictionApi.java    From Java-Deep-Learning-Cookbook with MIT License 5 votes vote down vote up
public static INDArray generateOutput(File inputFile, String modelFilePath) throws IOException, InterruptedException {
    final File modelFile = new File(modelFilePath);
    final MultiLayerNetwork network = ModelSerializer.restoreMultiLayerNetwork(modelFile);
    final RecordReader recordReader = generateReader(inputFile);
    //final INDArray array = RecordConverter.toArray(recordReader.next());
    final NormalizerStandardize normalizerStandardize = ModelSerializer.restoreNormalizerFromFile(modelFile);
    //normalizerStandardize.transform(array);
    final DataSetIterator dataSetIterator = new RecordReaderDataSetIterator.Builder(recordReader,1).build();
    normalizerStandardize.fit(dataSetIterator);
    dataSetIterator.setPreProcessor(normalizerStandardize);
    return network.output(dataSetIterator);

}
 
Example #29
Source File: NormalizationTests.java    From DataVec with Apache License 2.0 5 votes vote down vote up
@Test
public void testMeanStdZeros() {
    List<List<Writable>> data = new ArrayList<>();
    Schema.Builder builder = new Schema.Builder();
    int numColumns = 6;
    for (int i = 0; i < numColumns; i++)
        builder.addColumnDouble(String.valueOf(i));

    for (int i = 0; i < 5; i++) {
        List<Writable> record = new ArrayList<>(numColumns);
        data.add(record);
        for (int j = 0; j < numColumns; j++) {
            record.add(new DoubleWritable(1.0));
        }

    }

    INDArray arr = RecordConverter.toMatrix(data);

    Schema schema = builder.build();
    JavaRDD<List<Writable>> rdd = sc.parallelize(data);
    DataRowsFacade dataFrame = DataFrames.toDataFrame(schema, rdd);

    //assert equivalent to the ndarray pre processing
    NormalizerStandardize standardScaler = new NormalizerStandardize();
    standardScaler.fit(new DataSet(arr.dup(), arr.dup()));
    INDArray standardScalered = arr.dup();
    standardScaler.transform(new DataSet(standardScalered, standardScalered));
    DataNormalization zeroToOne = new NormalizerMinMaxScaler();
    zeroToOne.fit(new DataSet(arr.dup(), arr.dup()));
    INDArray zeroToOnes = arr.dup();
    zeroToOne.transform(new DataSet(zeroToOnes, zeroToOnes));
    List<Row> rows = Normalization.stdDevMeanColumns(dataFrame, dataFrame.get().columns());
    INDArray assertion = DataFrames.toMatrix(rows);
    //compare standard deviation
    assertTrue(standardScaler.getStd().equalsWithEps(assertion.getRow(0), 1e-1));
    //compare mean
    assertTrue(standardScaler.getMean().equalsWithEps(assertion.getRow(1), 1e-1));

}
 
Example #30
Source File: HyperParameterTuning.java    From Java-Deep-Learning-Cookbook with MIT License 5 votes vote down vote up
public DataSetIteratorSplitter dataSplit(DataSetIterator iterator) throws IOException, InterruptedException {
    DataNormalization dataNormalization = new NormalizerStandardize();
    dataNormalization.fit(iterator);
    iterator.setPreProcessor(dataNormalization);
    DataSetIteratorSplitter splitter = new DataSetIteratorSplitter(iterator,1000,0.8);
    return splitter;
}