Java Code Examples for org.nd4j.linalg.dataset.api.iterator.DataSetIterator#setPreProcessor()

The following examples show how to use org.nd4j.linalg.dataset.api.iterator.DataSetIterator#setPreProcessor() . You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.

Example 1

Source File: NormalizerTests.java From nd4j with Apache License 2.0

6 votes

public float testItervsDataset(DataNormalization preProcessor) {
    DataSet dataCopy = data.copy();
    DataSetIterator dataIter = new TestDataSetIterator(dataCopy, batchSize);
    preProcessor.fit(dataCopy);
    preProcessor.transform(dataCopy);
    INDArray transformA = dataCopy.getFeatures();

    preProcessor.fit(dataIter);
    dataIter.setPreProcessor(preProcessor);
    DataSet next = dataIter.next();
    INDArray transformB = next.getFeatures();

    while (dataIter.hasNext()) {
        next = dataIter.next();
        INDArray transformb = next.getFeatures();
        transformB = Nd4j.vstack(transformB, transformb);
    }

    return Transforms.abs(transformB.div(transformA).rsub(1)).maxNumber().floatValue();
}

Example 2

Source File: DataSetIteratorHelper.java From Java-Deep-Learning-Cookbook with MIT License

5 votes

private static DataSetIteratorSplitter createDataSetSplitter() throws IOException, InterruptedException {
    final RecordReader recordReader = DataSetIteratorHelper.generateReader(new ClassPathResource("Churn_Modelling.csv").getFile());
    final DataSetIterator dataSetIterator = new RecordReaderDataSetIterator.Builder(recordReader,batchSize)
            .classification(labelIndex,numClasses)
            .build();
    final DataNormalization dataNormalization = new NormalizerStandardize();
    dataNormalization.fit(dataSetIterator);
    dataSetIterator.setPreProcessor(dataNormalization);
    final DataSetIteratorSplitter dataSetIteratorSplitter = new DataSetIteratorSplitter(dataSetIterator,1250,0.8);
    return dataSetIteratorSplitter;
}

Example 3

Source File: TestScoreFunctions.java From deeplearning4j with Apache License 2.0

5 votes

@Override
public Object testData(Map<String, Object> dataParameters) {
    try {
        DataSetIterator iter = new MnistDataSetIterator(4, 16, false, false, false, 12345);
        iter.setPreProcessor(new PreProc(rocType));
        return iter;
    } catch (IOException e){
        throw new RuntimeException(e);
    }
}

Example 4

Source File: DataStorage.java From Java-Machine-Learning-for-Computer-Vision with MIT License

5 votes

default DataSetIterator getDataSetIterator(InputSplit sample) throws IOException {
    ImageRecordReader imageRecordReader = new ImageRecordReader(HEIGHT, WIDTH, CHANNELS, LABEL_GENERATOR_MAKER);
    imageRecordReader.initialize(sample);

    DataSetIterator iterator = new RecordReaderDataSetIterator(imageRecordReader, BATCH_SIZE, 1, NUM_POSSIBLE_LABELS);
    iterator.setPreProcessor(new VGG16ImagePreProcessor());
    return iterator;
}

Example 5

Source File: OCNNOutputLayerTest.java From deeplearning4j with Apache License 2.0

5 votes

public DataSetIterator getNormalizedIterator() {
    DataSetIterator dataSetIterator = new IrisDataSetIterator(150,150);
    NormalizerStandardize normalizerStandardize = new NormalizerStandardize();
    normalizerStandardize.fit(dataSetIterator);
    dataSetIterator.reset();
    dataSetIterator.setPreProcessor(normalizerStandardize);
    return dataSetIterator;
}

Example 6

Source File: HyperParameterTuning.java From Java-Deep-Learning-Cookbook with MIT License

5 votes

public DataSetIteratorSplitter dataSplit(DataSetIterator iterator) throws IOException, InterruptedException {
    DataNormalization dataNormalization = new NormalizerStandardize();
    dataNormalization.fit(iterator);
    iterator.setPreProcessor(dataNormalization);
    DataSetIteratorSplitter splitter = new DataSetIteratorSplitter(iterator,1000,0.8);
    return splitter;
}

Example 7

Source File: HyperParameterTuningArbiterUiExample.java From Java-Deep-Learning-Cookbook with MIT License

5 votes

public DataSetIteratorSplitter dataSplit(DataSetIterator iterator) throws IOException, InterruptedException {
    DataNormalization dataNormalization = new NormalizerStandardize();
    dataNormalization.fit(iterator);
    iterator.setPreProcessor(dataNormalization);
    DataSetIteratorSplitter splitter = new DataSetIteratorSplitter(iterator,1000,0.8);
    return splitter;
}

Example 8

Source File: ImageClassifierAPI.java From Java-Deep-Learning-Cookbook with MIT License

5 votes

public static INDArray generateOutput(File inputFile, String modelFileLocation) throws IOException, InterruptedException {
    //retrieve the saved model
    final File modelFile = new File(modelFileLocation);
    final MultiLayerNetwork model = ModelSerializer.restoreMultiLayerNetwork(modelFile);
    final RecordReader imageRecordReader = generateReader(inputFile);
    final ImagePreProcessingScaler normalizerStandardize = ModelSerializer.restoreNormalizerFromFile(modelFile);
    final DataSetIterator dataSetIterator = new RecordReaderDataSetIterator.Builder(imageRecordReader,1).build();
    normalizerStandardize.fit(dataSetIterator);
    dataSetIterator.setPreProcessor(normalizerStandardize);
    return model.output(dataSetIterator);
}

Example 9

Source File: HyperParameterTuning.java From Java-Deep-Learning-Cookbook with MIT License

5 votes

public DataSetIteratorSplitter dataSplit(DataSetIterator iterator) throws IOException, InterruptedException {
    DataNormalization dataNormalization = new NormalizerStandardize();
    dataNormalization.fit(iterator);
    iterator.setPreProcessor(dataNormalization);
    DataSetIteratorSplitter splitter = new DataSetIteratorSplitter(iterator,1000,0.8);
    return splitter;
}

Example 10

Source File: BNGradientCheckTest.java From deeplearning4j with Apache License 2.0

5 votes

@Test
    public void testGradient2dFixedGammaBeta() {
        DataNormalization scaler = new NormalizerMinMaxScaler();
        DataSetIterator iter = new IrisDataSetIterator(150, 150);
        scaler.fit(iter);
        iter.setPreProcessor(scaler);
        DataSet ds = iter.next();
        INDArray input = ds.getFeatures();
        INDArray labels = ds.getLabels();

        for (boolean useLogStd : new boolean[]{true, false}) {
            MultiLayerConfiguration.Builder builder = new NeuralNetConfiguration.Builder().updater(new NoOp())
                    .dataType(DataType.DOUBLE)
                    .seed(12345L)
                    .dist(new NormalDistribution(0, 1)).list()
                    .layer(0, new DenseLayer.Builder().nIn(4).nOut(3).activation(Activation.IDENTITY).build())
                    .layer(1, new BatchNormalization.Builder().useLogStd(useLogStd).lockGammaBeta(true).gamma(2.0).beta(0.5).nOut(3)
                            .build())
                    .layer(2, new ActivationLayer.Builder().activation(Activation.TANH).build())
                    .layer(3, new OutputLayer.Builder(LossFunctions.LossFunction.MCXENT)
                            .activation(Activation.SOFTMAX).nIn(3).nOut(3).build());

            MultiLayerNetwork mln = new MultiLayerNetwork(builder.build());
            mln.init();

//            for (int j = 0; j < mln.getnLayers(); j++)
//                System.out.println("Layer " + j + " # params: " + mln.getLayer(j).numParams());

            //Mean and variance vars are not gradient checkable; mean/variance "gradient" is used to implement running mean/variance calc
            //i.e., runningMean = decay * runningMean + (1-decay) * batchMean
            //However, numerical gradient will be 0 as forward pass doesn't depend on this "parameter"
            Set<String> excludeParams = new HashSet<>(Arrays.asList("1_mean", "1_var", "1_log10stdev"));
            boolean gradOK = GradientCheckUtil.checkGradients(new GradientCheckUtil.MLNConfig().net(mln).input(input)
                    .labels(labels).excludeParams(excludeParams));

            assertTrue(gradOK);
            TestUtils.testModelSerialization(mln);
        }
    }

Example 11

Source File: GradientCheckTests.java From deeplearning4j with Apache License 2.0

4 votes

@Test
public void testGradientWeightDecay() {

    Activation[] activFns = {Activation.SIGMOID, Activation.TANH, Activation.THRESHOLDEDRELU};
    boolean[] characteristic = {false, true}; //If true: run some backprop steps first

    LossFunction[] lossFunctions = {LossFunction.MCXENT, LossFunction.MSE};
    Activation[] outputActivations = {Activation.SOFTMAX, Activation.TANH}; //i.e., lossFunctions[i] used with outputActivations[i] here

    DataNormalization scaler = new NormalizerMinMaxScaler();
    DataSetIterator iter = new IrisDataSetIterator(150, 150);
    scaler.fit(iter);
    iter.setPreProcessor(scaler);
    DataSet ds = iter.next();

    INDArray input = ds.getFeatures();
    INDArray labels = ds.getLabels();

    //use l2vals[i] with l1vals[i]
    double[] l2vals = {0.4, 0.0, 0.4, 0.4, 0.0, 0.0};
    double[] l1vals = {0.0, 0.0, 0.5, 0.0, 0.5, 0.0};
    double[] biasL2 = {0.0, 0.0, 0.0, 0.2, 0.0, 0.0};
    double[] biasL1 = {0.0, 0.0, 0.6, 0.0, 0.0, 0.5};
    double[] wdVals = {0.0, 0.0, 0.0, 0.0, 0.4, 0.0};
    double[] wdBias = {0.0, 0.0, 0.0, 0.0, 0.0, 0.4};

    for (Activation afn : activFns) {
        for (int i = 0; i < lossFunctions.length; i++) {
            for (int k = 0; k < l2vals.length; k++) {
                LossFunction lf = lossFunctions[i];
                Activation outputActivation = outputActivations[i];
                double l2 = l2vals[k];
                double l1 = l1vals[k];

                MultiLayerConfiguration conf =
                        new NeuralNetConfiguration.Builder().l2(l2).l1(l1)
                                .dataType(DataType.DOUBLE)
                                .l2Bias(biasL2[k]).l1Bias(biasL1[k])
                                .weightDecay(wdVals[k]).weightDecayBias(wdBias[k])
                                .optimizationAlgo(OptimizationAlgorithm.CONJUGATE_GRADIENT)
                                .seed(12345L)
                                .list().layer(0,
                                new DenseLayer.Builder().nIn(4).nOut(3)
                                        .dist(new NormalDistribution(0,
                                                1))
                                        .updater(new NoOp())
                                        .activation(afn).build())
                                .layer(1, new OutputLayer.Builder(lf).nIn(3).nOut(3)
                                        .dist(new NormalDistribution(0, 1))
                                        .updater(new NoOp())
                                        .activation(outputActivation).build())
                                .build();

                MultiLayerNetwork mln = new MultiLayerNetwork(conf);
                mln.init();

                boolean gradOK1 = GradientCheckUtil.checkGradients(mln, DEFAULT_EPS, DEFAULT_MAX_REL_ERROR,
                        DEFAULT_MIN_ABS_ERROR, PRINT_RESULTS, RETURN_ON_FIRST_FAILURE, input, labels);

                String msg = "testGradientWeightDecay() - activationFn=" + afn + ", lossFn=" + lf
                        + ", outputActivation=" + outputActivation + ", l2=" + l2 + ", l1=" + l1;
                assertTrue(msg, gradOK1);

                TestUtils.testModelSerialization(mln);
            }
        }
    }
}

Example 12

Source File: CustomerRetentionPredictionExample.java From Java-Deep-Learning-Cookbook with MIT License

4 votes

public static void main(String[] args) throws IOException, InterruptedException {

       final int labelIndex=11;
       final int batchSize=8;
       final int numClasses=2;
       final INDArray weightsArray = Nd4j.create(new double[]{0.57, 0.75});

       final RecordReader recordReader = generateReader(new ClassPathResource("Churn_Modelling.csv").getFile());
       final DataSetIterator dataSetIterator = new RecordReaderDataSetIterator.Builder(recordReader,batchSize)
                                                                .classification(labelIndex,numClasses)
                                                                .build();
       final DataNormalization dataNormalization = new NormalizerStandardize();
       dataNormalization.fit(dataSetIterator);
       dataSetIterator.setPreProcessor(dataNormalization);
       final DataSetIteratorSplitter dataSetIteratorSplitter = new DataSetIteratorSplitter(dataSetIterator,1250,0.8);

       log.info("Building Model------------------->>>>>>>>>");

        final MultiLayerConfiguration configuration = new NeuralNetConfiguration.Builder()
                                                                    .weightInit(WeightInit.RELU_UNIFORM)
                                                                    .updater(new Adam(0.015D))
                                                                    .list()
                                                                    .layer(new DenseLayer.Builder().nIn(11).nOut(6).activation(Activation.RELU).dropOut(0.9).build())
                                                                    .layer(new DenseLayer.Builder().nIn(6).nOut(6).activation(Activation.RELU).dropOut(0.9).build())
                                                                    .layer(new DenseLayer.Builder().nIn(6).nOut(4).activation(Activation.RELU).dropOut(0.9).build())
                                                                    .layer(new OutputLayer.Builder(new LossMCXENT(weightsArray)).nIn(4).nOut(2).activation(Activation.SOFTMAX).build())
                                                                    .build();

        final UIServer uiServer = UIServer.getInstance();
        final StatsStorage statsStorage = new InMemoryStatsStorage();

        final MultiLayerNetwork multiLayerNetwork = new MultiLayerNetwork(configuration);
        multiLayerNetwork.init();
        multiLayerNetwork.setListeners(new ScoreIterationListener(100),
                                       new StatsListener(statsStorage));
        uiServer.attach(statsStorage);
        multiLayerNetwork.fit(dataSetIteratorSplitter.getTrainIterator(),100);

        final Evaluation evaluation =  multiLayerNetwork.evaluate(dataSetIteratorSplitter.getTestIterator(),Arrays.asList("0","1"));
        System.out.println(evaluation.stats());

        final File file = new File("model.zip");
        ModelSerializer.writeModel(multiLayerNetwork,file,true);
        ModelSerializer.addNormalizerToModel(file,dataNormalization);


    }

Example 13

Source File: NormalizerStandardizeTest.java From nd4j with Apache License 2.0

4 votes

@Test
public void testTransform() {
    /*Random dataset is generated such that
        AX + B where X is from a normal distribution with mean 0 and std 1
        The mean of above will be B and std A
        Obtained mean and std dev are compared to theoretical
        Transformed values should be the same as X with the same seed.
     */
    long randSeed = 41732786;

    int nFeatures = 2;
    int nSamples = 6400;
    int bsize = 8;
    int a = 5;
    int b = 100;
    INDArray sampleMean, sampleStd, sampleMeanDelta, sampleStdDelta, delta, deltaPerc;
    double maxDeltaPerc, sampleMeanSEM;

    genRandomDataSet normData = new genRandomDataSet(nSamples, nFeatures, a, b, randSeed);
    DataSet genRandExpected = normData.theoreticalTransform;
    genRandomDataSet expectedData = new genRandomDataSet(nSamples, nFeatures, 1, 0, randSeed);
    genRandomDataSet beforeTransformData = new genRandomDataSet(nSamples, nFeatures, a, b, randSeed);

    NormalizerStandardize myNormalizer = new NormalizerStandardize();
    DataSetIterator normIterator = normData.getIter(bsize);
    DataSetIterator genRandExpectedIter = new TestDataSetIterator(genRandExpected, bsize);
    DataSetIterator expectedIterator = expectedData.getIter(bsize);
    DataSetIterator beforeTransformIterator = beforeTransformData.getIter(bsize);

    myNormalizer.fit(normIterator);

    double tolerancePerc = 0.10; //within 0.1%
    sampleMean = myNormalizer.getMean();
    sampleMeanDelta = Transforms.abs(sampleMean.sub(normData.theoreticalMean));
    assertTrue(sampleMeanDelta.mul(100).div(normData.theoreticalMean).max(1).getDouble(0, 0) < tolerancePerc);
    //sanity check to see if it's within the theoretical standard error of mean
    sampleMeanSEM = sampleMeanDelta.div(normData.theoreticalSEM).max(1).getDouble(0, 0);
    assertTrue(sampleMeanSEM < 2.6); //99% of the time it should be within this many SEMs

    tolerancePerc = 1; //within 1% - std dev value
    sampleStd = myNormalizer.getStd();
    sampleStdDelta = Transforms.abs(sampleStd.sub(normData.theoreticalStd));

    assertTrue(sampleStdDelta.div(normData.theoreticalStd).max(1).mul(100).getDouble(0, 0) < tolerancePerc);

    tolerancePerc = 1; //within 1%
    normIterator.setPreProcessor(myNormalizer);
    while (normIterator.hasNext()) {
        INDArray before = beforeTransformIterator.next().getFeatures();
        INDArray origBefore = genRandExpectedIter.next().getFeatures();
        INDArray after = normIterator.next().getFeatures();
        INDArray expected = expectedIterator.next().getFeatures();
        delta = Transforms.abs(after.sub(expected));
        deltaPerc = delta.div(Transforms.abs(before.sub(expected)));
        deltaPerc.muli(100);
        maxDeltaPerc = deltaPerc.max(0, 1).getDouble(0, 0);
        /*
        System.out.println("=== BEFORE ===");
        System.out.println(before);
        System.out.println("=== ORIG BEFORE ===");
        System.out.println(origBefore);
        System.out.println("=== AFTER ===");
        System.out.println(after);
        System.out.println("=== SHOULD BE ===");
        System.out.println(expected);
        System.out.println("% diff, "+ maxDeltaPerc);
        */
        assertTrue(maxDeltaPerc < tolerancePerc);
    }
}

Example 14

Source File: CustomerRetentionPredictionExample.java From Java-Deep-Learning-Cookbook with MIT License

4 votes

public static void main(String[] args) throws IOException, InterruptedException {

       final int labelIndex=11;
       final int batchSize=8;
       final int numClasses=2;
       final INDArray weightsArray = Nd4j.create(new double[]{0.57, 0.75});

       final RecordReader recordReader = generateReader(new ClassPathResource("Churn_Modelling.csv").getFile());
       final DataSetIterator dataSetIterator = new RecordReaderDataSetIterator.Builder(recordReader,batchSize)
                                                                .classification(labelIndex,numClasses)
                                                                .build();
       final DataNormalization dataNormalization = new NormalizerStandardize();
       dataNormalization.fit(dataSetIterator);
       dataSetIterator.setPreProcessor(dataNormalization);
       final DataSetIteratorSplitter dataSetIteratorSplitter = new DataSetIteratorSplitter(dataSetIterator,1250,0.8);

       log.info("Building Model------------------->>>>>>>>>");

        final MultiLayerConfiguration configuration = new NeuralNetConfiguration.Builder()
                                                                    .weightInit(WeightInit.RELU_UNIFORM)
                                                                    .updater(new Adam(0.015D))
                                                                    .list()
                                                                    .layer(new DenseLayer.Builder().nIn(11).nOut(6).activation(Activation.RELU).dropOut(0.9).build())
                                                                    .layer(new DenseLayer.Builder().nIn(6).nOut(6).activation(Activation.RELU).dropOut(0.9).build())
                                                                    .layer(new DenseLayer.Builder().nIn(6).nOut(4).activation(Activation.RELU).dropOut(0.9).build())
                                                                    .layer(new OutputLayer.Builder(new LossMCXENT(weightsArray)).nIn(4).nOut(2).activation(Activation.SOFTMAX).build())
                                                                    .build();

        final UIServer uiServer = UIServer.getInstance();
        final StatsStorage statsStorage = new InMemoryStatsStorage();

        final MultiLayerNetwork multiLayerNetwork = new MultiLayerNetwork(configuration);
        multiLayerNetwork.init();
        multiLayerNetwork.setListeners(new ScoreIterationListener(100),
                                       new StatsListener(statsStorage));
        uiServer.attach(statsStorage);
        multiLayerNetwork.fit(dataSetIteratorSplitter.getTrainIterator(),100);

        final Evaluation evaluation =  multiLayerNetwork.evaluate(dataSetIteratorSplitter.getTestIterator(),Arrays.asList("0","1"));
        System.out.println(evaluation.stats());

        final File file = new File("model.zip");
        ModelSerializer.writeModel(multiLayerNetwork,file,true);
        ModelSerializer.addNormalizerToModel(file,dataNormalization);


    }

Example 15

Source File: NormalizeUciData.java From SKIL_Examples with Apache License 2.0

4 votes

public void run() throws Exception {
    File trainingOutputFile = new File(trainOutputPath);
    File testOutputFile = new File(testOutputPath);

    if (trainingOutputFile.exists() || testOutputFile.exists()) {
        System.out.println(String.format("Warning: overwriting output files (%s, %s)", trainOutputPath, testOutputPath));

        trainingOutputFile.delete();
        testOutputFile.delete();
    }

    System.out.format("downloading from %s\n", downloadUrl);
    System.out.format("writing training output to %s\n", trainOutputPath);
    System.out.format("writing testing output to %s\n", testOutputPath);

    URL url = new URL(downloadUrl);
    String data = IOUtils.toString(url);
    String[] lines = data.split("\n");
    List<INDArray> arrays = new LinkedList<INDArray>();
    List<Integer> labels = new LinkedList<Integer>();

    for (int i=0; i<lines.length; i++) {
        String line = lines[i];
        String[] cols = line.split("\\s+");

        int label = i / 100;
        INDArray array = Nd4j.zeros(1, 60);

        for (int j=0; j<cols.length; j++) {
            Double d = Double.parseDouble(cols[j]);
            array.putScalar(0, j, d);
        }

        arrays.add(array);
        labels.add(label);
    }

    // Shuffle with **known** seed
    Collections.shuffle(arrays, new Random(12345));
    Collections.shuffle(labels, new Random(12345));

    INDArray trainData = Nd4j.zeros(450, 60);
    INDArray testData = Nd4j.zeros(150, 60);

    for (int i=0; i<arrays.size(); i++) {
        INDArray arr = arrays.get(i);

        if (i < 450) { // Training
            trainData.putRow(i, arr);
        } else { // Test
            testData.putRow(i-450, arr);
        }
    }

    DataSet trainDs = new DataSet(trainData, trainData);
    DataSetIterator trainIt = new ListDataSetIterator(trainDs.asList());

    DataSet testDs = new DataSet(testData, testData);
    DataSetIterator testIt = new ListDataSetIterator(testDs.asList());

    // Fit normalizer on training data only!
    DataNormalization normalizer = dataNormalizer.getNormalizer();
    normalizer.fit(trainIt);

    // Print out basic summary stats
    switch (normalizer.getType()) {
        case STANDARDIZE:
            System.out.format("Normalizer - Standardize:\n  mean=%s\n  std= %s\n",
                    ((NormalizerStandardize)normalizer).getMean(),
                    ((NormalizerStandardize)normalizer).getStd());
    }

    // Use same normalizer for both
    trainIt.setPreProcessor(normalizer);
    testIt.setPreProcessor(normalizer);

    String trainOutput = toCsv(trainIt, labels.subList(0, 450), new int[]{1, 60});
    String testOutput = toCsv(testIt, labels.subList(450, 600), new int[]{1, 60});

    FileUtils.write(trainingOutputFile, trainOutput);
    System.out.format("wrote normalized training file to %s\n", trainingOutputFile);

    FileUtils.write(testOutputFile, testOutput);
    System.out.format("wrote normalized test file to %s\n", testOutputFile);

}

Example 16

Source File: NormalizerMinMaxScalerTest.java From deeplearning4j with Apache License 2.0

4 votes

@Test
public void testBruteForce() {
    //X_std = (X - X.min(axis=0)) / (X.max(axis=0) - X.min(axis=0))
    //X_scaled = X_std * (max - min) + min
    // Dataset features are scaled consecutive natural numbers
    int nSamples = 500;
    int x = 4, y = 2, z = 3;

    INDArray featureX = Nd4j.linspace(1, nSamples, nSamples).reshape(nSamples, 1);
    INDArray featureY = featureX.mul(y);
    INDArray featureZ = featureX.mul(z);
    featureX.muli(x);
    INDArray featureSet = Nd4j.concat(1, featureX, featureY, featureZ);
    INDArray labelSet = Nd4j.zeros(nSamples, 1);
    DataSet sampleDataSet = new DataSet(featureSet, labelSet);

    //expected min and max
    INDArray theoreticalMin = Nd4j.create(new double[] {x, y, z}, new long[]{1,3});
    INDArray theoreticalMax = Nd4j.create(new double[] {nSamples * x, nSamples * y, nSamples * z}, new long[]{1,3});
    INDArray theoreticalRange = theoreticalMax.sub(theoreticalMin);

    NormalizerMinMaxScaler myNormalizer = new NormalizerMinMaxScaler();
    myNormalizer.fit(sampleDataSet);

    INDArray minDataSet = myNormalizer.getMin();
    INDArray maxDataSet = myNormalizer.getMax();
    INDArray minDiff = minDataSet.sub(theoreticalMin).max();
    INDArray maxDiff = maxDataSet.sub(theoreticalMax).max();
    assertEquals(minDiff.getDouble(0), 0.0, 0.000000001);
    assertEquals(maxDiff.max().getDouble(0), 0.0, 0.000000001);

    // SAME TEST WITH THE ITERATOR
    int bSize = 1;
    DataSetIterator sampleIter = new TestDataSetIterator(sampleDataSet, bSize);
    myNormalizer.fit(sampleIter);
    minDataSet = myNormalizer.getMin();
    maxDataSet = myNormalizer.getMax();
    assertEquals(minDataSet.sub(theoreticalMin).max(1).getDouble(0), 0.0, 0.000000001);
    assertEquals(maxDataSet.sub(theoreticalMax).max(1).getDouble(0), 0.0, 0.000000001);

    sampleIter.setPreProcessor(myNormalizer);
    INDArray actual, expected, delta;
    int i = 1;
    while (sampleIter.hasNext()) {
        expected = theoreticalMin.mul(i - 1).div(theoreticalRange);
        actual = sampleIter.next().getFeatures();
        delta = Transforms.abs(actual.sub(expected));
        assertTrue(delta.max(1).getDouble(0) < 0.0001);
        i++;
    }

}

Example 17

Source File: BNGradientCheckTest.java From deeplearning4j with Apache License 2.0

4 votes

@Test
    public void testGradient2dSimple() {
        DataNormalization scaler = new NormalizerMinMaxScaler();
        DataSetIterator iter = new IrisDataSetIterator(150, 150);
        scaler.fit(iter);
        iter.setPreProcessor(scaler);
        DataSet ds = iter.next();
        INDArray input = ds.getFeatures();
        INDArray labels = ds.getLabels();

        for (boolean useLogStd : new boolean[]{true, false}) {

            MultiLayerConfiguration.Builder builder =
                    new NeuralNetConfiguration.Builder().updater(new NoOp())
                            .dataType(DataType.DOUBLE)
                            .seed(12345L)
                            .dist(new NormalDistribution(0, 1)).list()
                            .layer(0, new DenseLayer.Builder().nIn(4).nOut(3)
                                    .activation(Activation.IDENTITY).build())
                            .layer(1, new BatchNormalization.Builder().useLogStd(useLogStd).nOut(3).build())
                            .layer(2, new ActivationLayer.Builder().activation(Activation.TANH).build())
                            .layer(3, new OutputLayer.Builder(LossFunctions.LossFunction.MCXENT)
                                    .activation(Activation.SOFTMAX).nIn(3).nOut(3).build());

            MultiLayerNetwork mln = new MultiLayerNetwork(builder.build());
            mln.init();

//            for (int j = 0; j < mln.getnLayers(); j++)
//                System.out.println("Layer " + j + " # params: " + mln.getLayer(j).numParams());

            //Mean and variance vars are not gradient checkable; mean/variance "gradient" is used to implement running mean/variance calc
            //i.e., runningMean = decay * runningMean + (1-decay) * batchMean
            //However, numerical gradient will be 0 as forward pass doesn't depend on this "parameter"
            Set<String> excludeParams = new HashSet<>(Arrays.asList("1_mean", "1_var", "1_log10stdev"));
            boolean gradOK = GradientCheckUtil.checkGradients(new GradientCheckUtil.MLNConfig().net(mln).input(input)
                    .labels(labels).excludeParams(excludeParams));

            assertTrue(gradOK);
            TestUtils.testModelSerialization(mln);
        }
    }

Example 18

Source File: SameDiffTrainingTest.java From deeplearning4j with Apache License 2.0

4 votes

@Test
public void irisTrainingValidationTest() {

    DataSetIterator iter = new IrisDataSetIterator(150, 150);
    NormalizerStandardize std = new NormalizerStandardize();
    std.fit(iter);
    iter.setPreProcessor(std);

    DataSetIterator valIter = new IrisDataSetIterator(30, 60);
    NormalizerStandardize valStd = new NormalizerStandardize();
    valStd.fit(valIter);
    valIter.setPreProcessor(std);

    Nd4j.getRandom().setSeed(12345);
    SameDiff sd = SameDiff.create();

    SDVariable in = sd.placeHolder("input", DataType.FLOAT, -1, 4);
    SDVariable label = sd.placeHolder("label", DataType.FLOAT, -1, 3);

    SDVariable w0 = sd.var("w0", new XavierInitScheme('c', 4, 10), DataType.FLOAT, 4, 10);
    SDVariable b0 = sd.zero("b0", DataType.FLOAT, 1, 10);

    SDVariable w1 = sd.var("w1", new XavierInitScheme('c', 10, 3), DataType.FLOAT, 10, 3);
    SDVariable b1 = sd.zero("b1", DataType.FLOAT, 1, 3);

    SDVariable z0 = in.mmul(w0).add(b0);
    SDVariable a0 = sd.math().tanh(z0);
    SDVariable z1 = a0.mmul(w1).add("prediction", b1);
    SDVariable a1 = sd.nn().softmax(z1);

    SDVariable diff = sd.math().squaredDifference(a1, label);
    SDVariable lossMse = diff.mul(diff).mean();

    TrainingConfig conf = new TrainingConfig.Builder()
            .l2(1e-4)
            .updater(new Adam(1e-2))
            .dataSetFeatureMapping("input")
            .dataSetLabelMapping("label")
            .validationEvaluation("prediction", 0, new Evaluation())
            .build();

    sd.setTrainingConfig(conf);

    History hist = sd.fit().train(iter, 50).validate(valIter, 5).exec();

    Evaluation e = hist.finalValidationEvaluations().evaluation("prediction");

    System.out.println(e.stats());

    double acc = e.accuracy();

    assertTrue("Accuracy bad: " + acc, acc >= 0.75);
}

Example 19

Source File: NormalizerStandardizeTest.java From deeplearning4j with Apache License 2.0

4 votes

@Test
public void testTransform() {
    /*Random dataset is generated such that
        AX + B where X is from a normal distribution with mean 0 and std 1
        The mean of above will be B and std A
        Obtained mean and std dev are compared to theoretical
        Transformed values should be the same as X with the same seed.
     */
    long randSeed = 12345;

    int nFeatures = 2;
    int nSamples = 6400;
    int bsize = 8;
    int a = 5;
    int b = 100;
    INDArray sampleMean, sampleStd, sampleMeanDelta, sampleStdDelta, delta, deltaPerc;
    double maxDeltaPerc, sampleMeanSEM;

    genRandomDataSet normData = new genRandomDataSet(nSamples, nFeatures, a, b, randSeed);
    DataSet genRandExpected = normData.theoreticalTransform;
    genRandomDataSet expectedData = new genRandomDataSet(nSamples, nFeatures, 1, 0, randSeed);
    genRandomDataSet beforeTransformData = new genRandomDataSet(nSamples, nFeatures, a, b, randSeed);

    NormalizerStandardize myNormalizer = new NormalizerStandardize();
    DataSetIterator normIterator = normData.getIter(bsize);
    DataSetIterator genRandExpectedIter = new TestDataSetIterator(genRandExpected, bsize);
    DataSetIterator expectedIterator = expectedData.getIter(bsize);
    DataSetIterator beforeTransformIterator = beforeTransformData.getIter(bsize);

    myNormalizer.fit(normIterator);

    double tolerancePerc = 0.10; //within 0.1%
    sampleMean = myNormalizer.getMean();
    sampleMeanDelta = Transforms.abs(sampleMean.sub(normData.theoreticalMean));
    assertTrue(sampleMeanDelta.mul(100).div(normData.theoreticalMean).max().getDouble(0) < tolerancePerc);
    //sanity check to see if it's within the theoretical standard error of mean
    sampleMeanSEM = sampleMeanDelta.div(normData.theoreticalSEM).max().getDouble(0);
    assertTrue(sampleMeanSEM < 2.6); //99% of the time it should be within this many SEMs

    tolerancePerc = 1; //within 1% - std dev value
    sampleStd = myNormalizer.getStd();
    sampleStdDelta = Transforms.abs(sampleStd.sub(normData.theoreticalStd));

    double actualmaxDiff = sampleStdDelta.div(normData.theoreticalStd).max().mul(100).getDouble(0);
    assertTrue(actualmaxDiff < tolerancePerc);

    tolerancePerc = 1; //within 1%
    normIterator.setPreProcessor(myNormalizer);
    while (normIterator.hasNext()) {
        INDArray before = beforeTransformIterator.next().getFeatures();
        INDArray origBefore = genRandExpectedIter.next().getFeatures();
        INDArray after = normIterator.next().getFeatures();
        INDArray expected = expectedIterator.next().getFeatures();
        delta = Transforms.abs(after.sub(expected));
        deltaPerc = delta.div(Transforms.abs(before.sub(expected)));
        deltaPerc.muli(100);
        maxDeltaPerc = deltaPerc.max(0, 1).getDouble(0);
        /*
        System.out.println("=== BEFORE ===");
        System.out.println(before);
        System.out.println("=== ORIG BEFORE ===");
        System.out.println(origBefore);
        System.out.println("=== AFTER ===");
        System.out.println(after);
        System.out.println("=== SHOULD BE ===");
        System.out.println(expected);
        System.out.println("% diff, "+ maxDeltaPerc);
        */
        assertTrue(maxDeltaPerc < tolerancePerc);
    }
}

Example 20

Source File: NormalizerMinMaxScalerTest.java From nd4j with Apache License 2.0

4 votes

@Test
public void testBruteForce() {
    //X_std = (X - X.min(axis=0)) / (X.max(axis=0) - X.min(axis=0))
    //X_scaled = X_std * (max - min) + min
    // Dataset features are scaled consecutive natural numbers
    int nSamples = 500;
    int x = 4, y = 2, z = 3;

    INDArray featureX = Nd4j.linspace(1, nSamples, nSamples).reshape(nSamples, 1);
    INDArray featureY = featureX.mul(y);
    INDArray featureZ = featureX.mul(z);
    featureX.muli(x);
    INDArray featureSet = Nd4j.concat(1, featureX, featureY, featureZ);
    INDArray labelSet = Nd4j.zeros(nSamples, 1);
    DataSet sampleDataSet = new DataSet(featureSet, labelSet);

    //expected min and max
    INDArray theoreticalMin = Nd4j.create(new double[] {x, y, z});
    INDArray theoreticalMax = Nd4j.create(new double[] {nSamples * x, nSamples * y, nSamples * z});
    INDArray theoreticalRange = theoreticalMax.sub(theoreticalMin);

    NormalizerMinMaxScaler myNormalizer = new NormalizerMinMaxScaler();
    myNormalizer.fit(sampleDataSet);

    INDArray minDataSet = myNormalizer.getMin();
    INDArray maxDataSet = myNormalizer.getMax();
    INDArray minDiff = minDataSet.sub(theoreticalMin).max(1);
    INDArray maxDiff = maxDataSet.sub(theoreticalMax).max(1);
    assertEquals(minDiff.getDouble(0, 0), 0.0, 0.000000001);
    assertEquals(maxDiff.max(1).getDouble(0, 0), 0.0, 0.000000001);

    // SAME TEST WITH THE ITERATOR
    int bSize = 1;
    DataSetIterator sampleIter = new TestDataSetIterator(sampleDataSet, bSize);
    myNormalizer.fit(sampleIter);
    minDataSet = myNormalizer.getMin();
    maxDataSet = myNormalizer.getMax();
    assertEquals(minDataSet.sub(theoreticalMin).max(1).getDouble(0, 0), 0.0, 0.000000001);
    assertEquals(maxDataSet.sub(theoreticalMax).max(1).getDouble(0, 0), 0.0, 0.000000001);

    sampleIter.setPreProcessor(myNormalizer);
    INDArray actual, expected, delta;
    int i = 1;
    while (sampleIter.hasNext()) {
        expected = theoreticalMin.mul(i - 1).div(theoreticalRange);
        actual = sampleIter.next().getFeatures();
        delta = Transforms.abs(actual.sub(expected));
        assertTrue(delta.max(1).getDouble(0, 0) < 0.0001);
        i++;
    }

}