Java Code Examples for org.nd4j.linalg.dataset.DataSet#copy()

The following examples show how to use org.nd4j.linalg.dataset.DataSet#copy() . You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: UnderSamplingPreProcessorTest.java    From nd4j with Apache License 2.0 6 votes vote down vote up
@Test
public void allMajority() {
    float[] someTargets = new float[] {0.01f, 0.1f, 0.5f};
    DataSet d = allMajorityDataSet(false);
    DataSet dToPreProcess;
    for (int i = 0; i < someTargets.length; i++) {
        //if all majority default is to mask all time steps
        UnderSamplingByMaskingPreProcessor preProcessor =
                        new UnderSamplingByMaskingPreProcessor(someTargets[i], shortSeq / 2);
        dToPreProcess = d.copy();
        preProcessor.preProcess(dToPreProcess);
        assertEquals(Nd4j.zeros(dToPreProcess.getLabelsMaskArray().shape()), dToPreProcess.getLabelsMaskArray());

        //change default and check distribution which should be 1-targetMinorityDist
        preProcessor.donotMaskAllMajorityWindows();
        dToPreProcess = d.copy();
        preProcessor.preProcess(dToPreProcess);
        INDArray percentagesNow = dToPreProcess.getLabelsMaskArray().sum(1).div(shortSeq);
        assertTrue(Nd4j.valueArrayOf(percentagesNow.shape(), 1 - someTargets[i]).equalsWithEps(percentagesNow,
                        tolerancePerc));
    }
}
 
Example 2
Source File: UnderSamplingPreProcessorTest.java    From nd4j with Apache License 2.0 6 votes vote down vote up
@Test
public void allMinority() {
    float[] someTargets = new float[] {0.01f, 0.1f, 0.5f};
    DataSet d = allMinorityDataSet(false);
    DataSet dToPreProcess;
    for (int i = 0; i < someTargets.length; i++) {
        UnderSamplingByMaskingPreProcessor preProcessor =
                        new UnderSamplingByMaskingPreProcessor(someTargets[i], shortSeq / 2);
        dToPreProcess = d.copy();
        preProcessor.preProcess(dToPreProcess);
        //all minority classes present  - check that no time steps are masked
        assertEquals(Nd4j.ones(minibatchSize, shortSeq), dToPreProcess.getLabelsMaskArray());

        //check behavior with override minority - now these are seen as all majority classes
        preProcessor.overrideMinorityDefault();
        preProcessor.donotMaskAllMajorityWindows();
        dToPreProcess = d.copy();
        preProcessor.preProcess(dToPreProcess);
        INDArray percentagesNow = dToPreProcess.getLabelsMaskArray().sum(1).div(shortSeq);
        assertTrue(Nd4j.valueArrayOf(percentagesNow.shape(), 1 - someTargets[i]).equalsWithEps(percentagesNow,
                        tolerancePerc));
    }
}
 
Example 3
Source File: KFoldIterator.java    From nd4j with Apache License 2.0 6 votes vote down vote up
/**Create an iterator given the dataset and a value of k (optional, defaults to 10)
 * If number of samples in the dataset is not a multiple of k, the last fold will have less samples with the rest having the same number of samples.
 *
 * @param k number of folds (optional, defaults to 10)
 * @param singleFold DataSet to split into k folds
 */

public KFoldIterator(int k, DataSet singleFold) {
    this.k = k;
    this.singleFold = singleFold.copy();
    if (k <= 1)
        throw new IllegalArgumentException();
    if (singleFold.numExamples() % k != 0) {
        if (k != 2) {
            this.batch = singleFold.numExamples() / (k - 1);
            this.lastBatch = singleFold.numExamples() % (k - 1);
        } else {
            this.lastBatch = singleFold.numExamples() / 2;
            this.batch = this.lastBatch + 1;
        }
    } else {
        this.batch = singleFold.numExamples() / k;
        this.lastBatch = singleFold.numExamples() / k;
    }
}
 
Example 4
Source File: UnderSamplingPreProcessorTest.java    From deeplearning4j with Apache License 2.0 6 votes vote down vote up
@Test
public void allMajority() {
    float[] someTargets = new float[] {0.01f, 0.1f, 0.5f};
    DataSet d = allMajorityDataSet(false);
    DataSet dToPreProcess;
    for (int i = 0; i < someTargets.length; i++) {
        //if all majority default is to mask all time steps
        UnderSamplingByMaskingPreProcessor preProcessor =
                        new UnderSamplingByMaskingPreProcessor(someTargets[i], shortSeq / 2);
        dToPreProcess = d.copy();
        preProcessor.preProcess(dToPreProcess);
        INDArray exp = Nd4j.zeros(dToPreProcess.getLabelsMaskArray().shape());
        INDArray lm = dToPreProcess.getLabelsMaskArray();
        assertEquals(exp, lm);

        //change default and check distribution which should be 1-targetMinorityDist
        preProcessor.donotMaskAllMajorityWindows();
        dToPreProcess = d.copy();
        preProcessor.preProcess(dToPreProcess);
        INDArray percentagesNow = dToPreProcess.getLabelsMaskArray().sum(1).div(shortSeq);
        assertTrue(Nd4j.valueArrayOf(percentagesNow.shape(), 1 - someTargets[i]).castTo(Nd4j.defaultFloatingPointType()).equalsWithEps(percentagesNow,
                        tolerancePerc));
    }
}
 
Example 5
Source File: UnderSamplingPreProcessorTest.java    From deeplearning4j with Apache License 2.0 6 votes vote down vote up
@Test
public void allMinority() {
    float[] someTargets = new float[] {0.01f, 0.1f, 0.5f};
    DataSet d = allMinorityDataSet(false);
    DataSet dToPreProcess;
    for (int i = 0; i < someTargets.length; i++) {
        UnderSamplingByMaskingPreProcessor preProcessor =
                        new UnderSamplingByMaskingPreProcessor(someTargets[i], shortSeq / 2);
        dToPreProcess = d.copy();
        preProcessor.preProcess(dToPreProcess);
        //all minority classes present  - check that no time steps are masked
        assertEquals(Nd4j.ones(minibatchSize, shortSeq), dToPreProcess.getLabelsMaskArray());

        //check behavior with override minority - now these are seen as all majority classes
        preProcessor.overrideMinorityDefault();
        preProcessor.donotMaskAllMajorityWindows();
        dToPreProcess = d.copy();
        preProcessor.preProcess(dToPreProcess);
        INDArray percentagesNow = dToPreProcess.getLabelsMaskArray().sum(1).div(shortSeq);
        assertTrue(Nd4j.valueArrayOf(percentagesNow.shape(), 1 - someTargets[i])
                .castTo(Nd4j.defaultFloatingPointType()).equalsWithEps(percentagesNow,tolerancePerc));
    }
}
 
Example 6
Source File: LoneTest.java    From nd4j with Apache License 2.0 5 votes vote down vote up
@Test
public void maskWhenMerge() {
    DataSet dsA = new DataSet(Nd4j.linspace(1, 15, 15).reshape(1, 3, 5), Nd4j.zeros(1, 3, 5));
    DataSet dsB = new DataSet(Nd4j.linspace(1, 9, 9).reshape(1, 3, 3), Nd4j.zeros(1, 3, 3));
    List<DataSet> dataSetList = new ArrayList<DataSet>();
    dataSetList.add(dsA);
    dataSetList.add(dsB);
    DataSet fullDataSet = DataSet.merge(dataSetList);
    assertTrue(fullDataSet.getFeaturesMaskArray() != null);

    DataSet fullDataSetCopy = fullDataSet.copy();
    assertTrue(fullDataSetCopy.getFeaturesMaskArray() != null);

}
 
Example 7
Source File: LoneTest.java    From deeplearning4j with Apache License 2.0 5 votes vote down vote up
@Test
public void maskWhenMerge() {
    DataSet dsA = new DataSet(Nd4j.linspace(1, 15, 15).reshape(1, 3, 5), Nd4j.zeros(1, 3, 5));
    DataSet dsB = new DataSet(Nd4j.linspace(1, 9, 9).reshape(1, 3, 3), Nd4j.zeros(1, 3, 3));
    List<DataSet> dataSetList = new ArrayList<DataSet>();
    dataSetList.add(dsA);
    dataSetList.add(dsB);
    DataSet fullDataSet = DataSet.merge(dataSetList);
    assertTrue(fullDataSet.getFeaturesMaskArray() != null);

    DataSet fullDataSetCopy = fullDataSet.copy();
    assertTrue(fullDataSetCopy.getFeaturesMaskArray() != null);

}
 
Example 8
Source File: UnderSamplingPreProcessorTest.java    From nd4j with Apache License 2.0 4 votes vote down vote up
@Test
public void mixedDist() {

    UnderSamplingByMaskingPreProcessor preProcessor = new UnderSamplingByMaskingPreProcessor(targetDist, window);

    DataSet dataSet = knownDistVariedDataSet(new float[] {0.1f, 0.2f, 0.8f}, false);

    //Call preprocess for the same dataset multiple times to mimic calls with .next() and checks total distribution
    int loop = 2;
    for (int i = 0; i < loop; i++) {
        //preprocess dataset
        DataSet dataSetToPreProcess = dataSet.copy();
        INDArray labelsBefore = dataSetToPreProcess.getLabels().dup();
        preProcessor.preProcess(dataSetToPreProcess);
        INDArray labels = dataSetToPreProcess.getLabels();
        assertEquals(labelsBefore, labels);

        //check masks are zero where there are no time steps
        INDArray masks = dataSetToPreProcess.getLabelsMaskArray();
        INDArray shouldBeAllZeros =
                        masks.get(NDArrayIndex.interval(0, 3), NDArrayIndex.interval(shortSeq, longSeq));
        assertEquals(Nd4j.zeros(shouldBeAllZeros.shape()), shouldBeAllZeros);

        //check distribution of masks in window, going backwards from last time step
        for (int j = (int) Math.ceil((double) longSeq / window); j > 0; j--) {
            //collect mask and labels
            int maxIndex = min(longSeq, j * window);
            int minIndex = min(0, maxIndex - window);
            INDArray maskWindow = masks.get(NDArrayIndex.all(), NDArrayIndex.interval(minIndex, maxIndex));
            INDArray labelWindow = labels.get(NDArrayIndex.all(), NDArrayIndex.point(0),
                            NDArrayIndex.interval(minIndex, maxIndex));

            //calc minority class distribution
            INDArray minorityDist = labelWindow.mul(maskWindow).sum(1).div(maskWindow.sum(1));

            if (j < shortSeq / window) {
                assertEquals("Failed on window " + j + " batch 0, loop " + i, targetDist,
                                minorityDist.getFloat(0, 0), tolerancePerc); //should now be close to target dist
                assertEquals("Failed on window " + j + " batch 1, loop " + i, targetDist,
                                minorityDist.getFloat(1, 0), tolerancePerc); //should now be close to target dist
                assertEquals("Failed on window " + j + " batch 2, loop " + i, 0.8, minorityDist.getFloat(2, 0),
                                tolerancePerc); //should be unchanged as it was already above target dist
            }
            assertEquals("Failed on window " + j + " batch 3, loop " + i, targetDist, minorityDist.getFloat(3, 0),
                            tolerancePerc); //should now be close to target dist
            assertEquals("Failed on window " + j + " batch 4, loop " + i, targetDist, minorityDist.getFloat(4, 0),
                            tolerancePerc); //should now be close to target dist
            assertEquals("Failed on window " + j + " batch 5, loop " + i, 0.8, minorityDist.getFloat(5, 0),
                            tolerancePerc); //should be unchanged as it was already above target dist
        }
    }
}
 
Example 9
Source File: UnderSamplingPreProcessorTest.java    From nd4j with Apache License 2.0 4 votes vote down vote up
@Test
public void mixedDistOneHot() {

    //preprocessor should give 30% minority class for every "window"
    UnderSamplingByMaskingPreProcessor preProcessor = new UnderSamplingByMaskingPreProcessor(targetDist, window);
    preProcessor.overrideMinorityDefault();

    //construct a dataset with known distribution of minority class and varying time steps
    DataSet dataSet = knownDistVariedDataSet(new float[] {0.9f, 0.8f, 0.2f}, true);

    //Call preprocess for the same dataset multiple times to mimic calls with .next() and checks total distribution
    int loop = 10;
    for (int i = 0; i < loop; i++) {

        //preprocess dataset
        DataSet dataSetToPreProcess = dataSet.copy();
        preProcessor.preProcess(dataSetToPreProcess);
        INDArray labels = dataSetToPreProcess.getLabels();
        INDArray masks = dataSetToPreProcess.getLabelsMaskArray();

        //check masks are zero where there were no time steps
        INDArray shouldBeAllZeros =
                        masks.get(NDArrayIndex.interval(0, 3), NDArrayIndex.interval(shortSeq, longSeq));
        assertEquals(Nd4j.zeros(shouldBeAllZeros.shape()), shouldBeAllZeros);

        //check distribution of masks in the window length, going backwards from last time step
        for (int j = (int) Math.ceil((double) longSeq / window); j > 0; j--) {
            //collect mask and labels
            int maxIndex = min(longSeq, j * window);
            int minIndex = min(0, maxIndex - window);
            INDArray maskWindow = masks.get(NDArrayIndex.all(), NDArrayIndex.interval(minIndex, maxIndex));
            INDArray labelWindow = labels.get(NDArrayIndex.all(), NDArrayIndex.all(),
                            NDArrayIndex.interval(minIndex, maxIndex));

            //calc minority class distribution after accounting for masks
            INDArray minorityClass = labelWindow.get(NDArrayIndex.all(), NDArrayIndex.point(0), NDArrayIndex.all())
                            .mul(maskWindow);
            INDArray majorityClass = labelWindow.get(NDArrayIndex.all(), NDArrayIndex.point(1), NDArrayIndex.all())
                            .mul(maskWindow);
            INDArray minorityDist = minorityClass.sum(1).div(majorityClass.add(minorityClass).sum(1));

            if (j < shortSeq / window) {
                assertEquals("Failed on window " + j + " batch 0, loop " + i, targetDist,
                                minorityDist.getFloat(0, 0), tolerancePerc); //should now be close to target dist
                assertEquals("Failed on window " + j + " batch 1, loop " + i, targetDist,
                                minorityDist.getFloat(1, 0), tolerancePerc); //should now be close to target dist
                assertEquals("Failed on window " + j + " batch 2, loop " + i, 0.8, minorityDist.getFloat(2, 0),
                                tolerancePerc); //should be unchanged as it was already above target dist
            }
            assertEquals("Failed on window " + j + " batch 3, loop " + i, targetDist, minorityDist.getFloat(3, 0),
                            tolerancePerc); //should now be close to target dist
            assertEquals("Failed on window " + j + " batch 4, loop " + i, targetDist, minorityDist.getFloat(4, 0),
                            tolerancePerc); //should now be close to target dist
            assertEquals("Failed on window " + j + " batch 5, loop " + i, 0.8, minorityDist.getFloat(5, 0),
                            tolerancePerc); //should be unchanged as it was already above target dist
        }
    }
}
 
Example 10
Source File: UnderSamplingPreProcessorTest.java    From deeplearning4j with Apache License 2.0 4 votes vote down vote up
@Test
public void mixedDist() {

    UnderSamplingByMaskingPreProcessor preProcessor = new UnderSamplingByMaskingPreProcessor(targetDist, window);

    DataSet dataSet = knownDistVariedDataSet(new float[] {0.1f, 0.2f, 0.8f}, false);

    //Call preprocess for the same dataset multiple times to mimic calls with .next() and checks total distribution
    int loop = 2;
    for (int i = 0; i < loop; i++) {
        //preprocess dataset
        DataSet dataSetToPreProcess = dataSet.copy();
        INDArray labelsBefore = dataSetToPreProcess.getLabels().dup();
        preProcessor.preProcess(dataSetToPreProcess);
        INDArray labels = dataSetToPreProcess.getLabels();
        assertEquals(labelsBefore, labels);

        //check masks are zero where there are no time steps
        INDArray masks = dataSetToPreProcess.getLabelsMaskArray();
        INDArray shouldBeAllZeros =
                        masks.get(NDArrayIndex.interval(0, 3), NDArrayIndex.interval(shortSeq, longSeq));
        assertEquals(Nd4j.zeros(shouldBeAllZeros.shape()), shouldBeAllZeros);

        //check distribution of masks in window, going backwards from last time step
        for (int j = (int) Math.ceil((double) longSeq / window); j > 0; j--) {
            //collect mask and labels
            int maxIndex = min(longSeq, j * window);
            int minIndex = min(0, maxIndex - window);
            INDArray maskWindow = masks.get(NDArrayIndex.all(), NDArrayIndex.interval(minIndex, maxIndex));
            INDArray labelWindow = labels.get(NDArrayIndex.all(), NDArrayIndex.point(0),
                            NDArrayIndex.interval(minIndex, maxIndex));

            //calc minority class distribution
            INDArray minorityDist = labelWindow.mul(maskWindow).sum(1).div(maskWindow.sum(1));

            if (j < shortSeq / window) {
                assertEquals("Failed on window " + j + " batch 0, loop " + i, targetDist,
                                minorityDist.getFloat(0), tolerancePerc); //should now be close to target dist
                assertEquals("Failed on window " + j + " batch 1, loop " + i, targetDist,
                                minorityDist.getFloat(1), tolerancePerc); //should now be close to target dist
                assertEquals("Failed on window " + j + " batch 2, loop " + i, 0.8, minorityDist.getFloat(2),
                                tolerancePerc); //should be unchanged as it was already above target dist
            }
            assertEquals("Failed on window " + j + " batch 3, loop " + i, targetDist, minorityDist.getFloat(3),
                            tolerancePerc); //should now be close to target dist
            assertEquals("Failed on window " + j + " batch 4, loop " + i, targetDist, minorityDist.getFloat(4),
                            tolerancePerc); //should now be close to target dist
            assertEquals("Failed on window " + j + " batch 5, loop " + i, 0.8, minorityDist.getFloat(5),
                            tolerancePerc); //should be unchanged as it was already above target dist
        }
    }
}
 
Example 11
Source File: UnderSamplingPreProcessorTest.java    From deeplearning4j with Apache License 2.0 4 votes vote down vote up
@Test
public void mixedDistOneHot() {

    //preprocessor should give 30% minority class for every "window"
    UnderSamplingByMaskingPreProcessor preProcessor = new UnderSamplingByMaskingPreProcessor(targetDist, window);
    preProcessor.overrideMinorityDefault();

    //construct a dataset with known distribution of minority class and varying time steps
    DataSet dataSet = knownDistVariedDataSet(new float[] {0.9f, 0.8f, 0.2f}, true);

    //Call preprocess for the same dataset multiple times to mimic calls with .next() and checks total distribution
    int loop = 10;
    for (int i = 0; i < loop; i++) {

        //preprocess dataset
        DataSet dataSetToPreProcess = dataSet.copy();
        preProcessor.preProcess(dataSetToPreProcess);
        INDArray labels = dataSetToPreProcess.getLabels();
        INDArray masks = dataSetToPreProcess.getLabelsMaskArray();

        //check masks are zero where there were no time steps
        INDArray shouldBeAllZeros =
                        masks.get(NDArrayIndex.interval(0, 3), NDArrayIndex.interval(shortSeq, longSeq));
        assertEquals(Nd4j.zeros(shouldBeAllZeros.shape()), shouldBeAllZeros);

        //check distribution of masks in the window length, going backwards from last time step
        for (int j = (int) Math.ceil((double) longSeq / window); j > 0; j--) {
            //collect mask and labels
            int maxIndex = min(longSeq, j * window);
            int minIndex = min(0, maxIndex - window);
            INDArray maskWindow = masks.get(NDArrayIndex.all(), NDArrayIndex.interval(minIndex, maxIndex));
            INDArray labelWindow = labels.get(NDArrayIndex.all(), NDArrayIndex.all(),
                            NDArrayIndex.interval(minIndex, maxIndex));

            //calc minority class distribution after accounting for masks
            INDArray minorityClass = labelWindow.get(NDArrayIndex.all(), NDArrayIndex.point(0), NDArrayIndex.all())
                            .mul(maskWindow);
            INDArray majorityClass = labelWindow.get(NDArrayIndex.all(), NDArrayIndex.point(1), NDArrayIndex.all())
                            .mul(maskWindow);
            INDArray minorityDist = minorityClass.sum(1).div(majorityClass.add(minorityClass).sum(1));

            if (j < shortSeq / window) {
                assertEquals("Failed on window " + j + " batch 0, loop " + i, targetDist,
                                minorityDist.getFloat(0), tolerancePerc); //should now be close to target dist
                assertEquals("Failed on window " + j + " batch 1, loop " + i, targetDist,
                                minorityDist.getFloat(1), tolerancePerc); //should now be close to target dist
                assertEquals("Failed on window " + j + " batch 2, loop " + i, 0.8, minorityDist.getFloat(2),
                                tolerancePerc); //should be unchanged as it was already above target dist
            }
            assertEquals("Failed on window " + j + " batch 3, loop " + i, targetDist, minorityDist.getFloat(3),
                            tolerancePerc); //should now be close to target dist
            assertEquals("Failed on window " + j + " batch 4, loop " + i, targetDist, minorityDist.getFloat(4),
                            tolerancePerc); //should now be close to target dist
            assertEquals("Failed on window " + j + " batch 5, loop " + i, 0.8, minorityDist.getFloat(5),
                            tolerancePerc); //should be unchanged as it was already above target dist
        }
    }
}
 
Example 12
Source File: ModelSerializerTest.java    From deeplearning4j with Apache License 2.0 3 votes vote down vote up
@Test
public void testSaveRestoreNormalizerFromInputStream() throws Exception {
    DataSet dataSet = trivialDataSet();
    NormalizerStandardize norm = new NormalizerStandardize();
    norm.fit(dataSet);

    ComputationGraph cg = simpleComputationGraph();
    cg.init();

    File tempFile = tempDir.newFile();

    ModelSerializer.writeModel(cg, tempFile, true);

    ModelSerializer.addNormalizerToModel(tempFile, norm);
    FileInputStream fis = new FileInputStream(tempFile);


    NormalizerStandardize restored = ModelSerializer.restoreNormalizerFromInputStream(fis);

    assertNotEquals(null, restored);

    DataSet dataSet2 = dataSet.copy();

    norm.preProcess(dataSet2);
    assertNotEquals(dataSet.getFeatures(), dataSet2.getFeatures());

    restored.revert(dataSet2);
    assertEquals(dataSet.getFeatures(), dataSet2.getFeatures());
}