Java Code Examples for org.nd4j.linalg.dataset.DataSet#merge()

The following examples show how to use org.nd4j.linalg.dataset.DataSet#merge() . You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.

Example 1

Source File: ListDataSetIterator.java From deeplearning4j with Apache License 2.0

6 votes

@Override
public DataSet next(int num) {
    int end = curr + num;

    List<DataSet> r = new ArrayList<>();
    if (end >= list.size())
        end = list.size();
    for (; curr < end; curr++) {
        r.add(list.get(curr));
    }

    DataSet d = DataSet.merge(r);
    if (preProcessor != null) {
        if (!d.isPreProcessed()) {
            preProcessor.preProcess(d);
            d.markAsPreProcessed();
        }
    }
    return d;
}

Example 2

Source File: TestDataSetIterator.java From nd4j with Apache License 2.0

6 votes

@Override
public DataSet next(int num) {
    int end = curr + num;

    List<DataSet> r = new ArrayList<>();
    if (end >= list.size())
        end = list.size();
    for (; curr < end; curr++) {
        r.add(list.get(curr));
    }

    DataSet d = DataSet.merge(r);
    if (preProcessor != null)
        preProcessor.preProcess(d);
    return d;
}

Example 3

Source File: TestDataSetIterator.java From deeplearning4j with Apache License 2.0

6 votes

@Override
public DataSet next(int num) {
    int end = curr + num;

    List<DataSet> r = new ArrayList<>();
    if (end >= list.size())
        end = list.size();
    for (; curr < end; curr++) {
        r.add(list.get(curr));
    }

    DataSet d = DataSet.merge(r);
    if (preProcessor != null)
        preProcessor.preProcess(d);
    return d;
}

Example 4

Source File: SpecialTests.java From deeplearning4j with Apache License 2.0

5 votes

@Test
public void testScalarShuffle2() {
    List<DataSet> listData = new ArrayList<>();
    for (int i = 0; i < 3; i++) {
        INDArray features = Nd4j.ones(14, 25);
        INDArray label = Nd4j.create(14, 50);
        DataSet dataset = new DataSet(features, label);
        listData.add(dataset);
    }
    DataSet data = DataSet.merge(listData);
    data.shuffle();
}

Example 5

Source File: BaseSparkTest.java From deeplearning4j with Apache License 2.0

5 votes

protected JavaRDD<DataSet> getBasicSparkDataSet(int nRows, INDArray input, INDArray labels) {
    List<DataSet> list = new ArrayList<>();
    for (int i = 0; i < nRows; i++) {
        INDArray inRow = input.getRow(i, true).dup();
        INDArray outRow = labels.getRow(i, true).dup();

        DataSet ds = new DataSet(inRow, outRow);
        list.add(ds);
    }
    list.iterator();

    data = DataSet.merge(list);
    return sc.parallelize(list);
}

Example 6

Source File: BaseSparkTest.java From deeplearning4j with Apache License 2.0

5 votes

protected JavaRDD<DataSet> getBasicSparkDataSet(int nRows, INDArray input, INDArray labels) {
    List<DataSet> list = new ArrayList<>();
    for (int i = 0; i < nRows; i++) {
        INDArray inRow = input.getRow(i, true).dup();
        INDArray outRow = labels.getRow(i, true).dup();

        DataSet ds = new DataSet(inRow, outRow);
        list.add(ds);
    }
    list.iterator();

    data = DataSet.merge(list);
    return sc.parallelize(list);
}

Example 7

Source File: WSTestDataSetIterator.java From deeplearning4j with Apache License 2.0

5 votes

@Override
public DataSet next(int i) {
    final LinkedList<DataSet> parts = new LinkedList<>();
    while(parts.size() < i && hasNext()){
        parts.add(nextOne());
    }
    cursor++;
    return DataSet.merge(parts);
}

Example 8

Source File: CifarLoader.java From DataVec with Apache License 2.0

5 votes

public DataSet next(int batchSize, int exampleNum) {
    List<DataSet> temp = new ArrayList<>();
    DataSet result;
    if (cifarProcessedFilesExists() && useSpecialPreProcessCifar) {
        if (exampleNum == 0 || ((exampleNum / fileNum) == numToConvertDS && train)) {
            fileNum++;
            if (train)
                loadDS.load(new File(trainFilesSerialized + fileNum + ".ser"));
            loadDS.load(new File(testFilesSerialized));
            // Shuffle all examples in file before batching happens also for each reset
            if (shuffle && batchSize > 1)
                loadDS.shuffle(seed);
            loadDSIndex = 0;
            //          inputBatched = loadDS.batchBy(batchSize);
        }
        // TODO loading full train dataset when using cuda causes memory error - find way to load into list off gpu
        //            result = inputBatched.get(batchNum);
        for (int i = 0; i < batchSize; i++) {
            if (loadDS.get(loadDSIndex) != null)
                temp.add(loadDS.get(loadDSIndex));
            else
                break;
            loadDSIndex++;
        }
        if (temp.size() > 1)
            result = DataSet.merge(temp);
        else
            result = temp.get(0);
    } else {
        result = convertDataSet(batchSize);
    }
    return result;
}

Example 9

Source File: SpecialTests.java From deeplearning4j with Apache License 2.0

5 votes

@Test(expected = ND4JIllegalStateException.class)
public void testScalarShuffle1() {
    List<DataSet> listData = new ArrayList<>();
    for (int i = 0; i < 3; i++) {
        INDArray features = Nd4j.ones(25, 25);
        INDArray label = Nd4j.create(new float[] {1}, new int[] {1});
        DataSet dataset = new DataSet(features, label);
        listData.add(dataset);
    }
    DataSet data = DataSet.merge(listData);
    data.shuffle();
}

Example 10

Source File: LoneTest.java From deeplearning4j with Apache License 2.0

5 votes

@Test
public void maskWhenMerge() {
    DataSet dsA = new DataSet(Nd4j.linspace(1, 15, 15).reshape(1, 3, 5), Nd4j.zeros(1, 3, 5));
    DataSet dsB = new DataSet(Nd4j.linspace(1, 9, 9).reshape(1, 3, 3), Nd4j.zeros(1, 3, 3));
    List<DataSet> dataSetList = new ArrayList<DataSet>();
    dataSetList.add(dsA);
    dataSetList.add(dsB);
    DataSet fullDataSet = DataSet.merge(dataSetList);
    assertTrue(fullDataSet.getFeaturesMaskArray() != null);

    DataSet fullDataSetCopy = fullDataSet.copy();
    assertTrue(fullDataSetCopy.getFeaturesMaskArray() != null);

}

Example 11

Source File: UnderSamplingPreProcessorTest.java From deeplearning4j with Apache License 2.0

5 votes

public DataSet knownDistVariedDataSet(float[] dist, boolean twoClass) {
    //construct a dataset with known distribution of minority class and varying time steps
    DataSet batchATimeSteps = makeDataSetSameL(minibatchSize, shortSeq, dist, twoClass);
    DataSet batchBTimeSteps = makeDataSetSameL(minibatchSize, longSeq, dist, twoClass);
    List<DataSet> listofbatches = new ArrayList<>();
    listofbatches.add(batchATimeSteps);
    listofbatches.add(batchBTimeSteps);
    return DataSet.merge(listofbatches);
}

Example 12

Source File: CifarLoader.java From deeplearning4j with Apache License 2.0

5 votes

public DataSet next(int batchSize, int exampleNum) {
    List<DataSet> temp = new ArrayList<>();
    DataSet result;
    if (cifarProcessedFilesExists() && useSpecialPreProcessCifar) {
        if (exampleNum == 0 || ((exampleNum / fileNum) == numToConvertDS && train)) {
            fileNum++;
            if (train)
                loadDS.load(new File(trainFilesSerialized + fileNum + ".ser"));
            loadDS.load(new File(testFilesSerialized));
            // Shuffle all examples in file before batching happens also for each reset
            if (shuffle && batchSize > 1)
                loadDS.shuffle(seed);
            loadDSIndex = 0;
            //          inputBatched = loadDS.batchBy(batchSize);
        }
        // TODO loading full train dataset when using cuda causes memory error - find way to load into list off gpu
        //            result = inputBatched.get(batchNum);
        for (int i = 0; i < batchSize; i++) {
            if (loadDS.get(loadDSIndex) != null)
                temp.add(loadDS.get(loadDSIndex));
            else
                break;
            loadDSIndex++;
        }
        if (temp.size() > 1)
            result = DataSet.merge(temp);
        else
            result = temp.get(0);
    } else {
        result = convertDataSet(batchSize);
    }
    return result;
}

Example 13

Source File: SpecialTests.java From nd4j with Apache License 2.0

5 votes

@Test(expected = ND4JIllegalStateException.class)
public void testScalarShuffle1() throws Exception {
    List<DataSet> listData = new ArrayList<>();
    for (int i = 0; i < 3; i++) {
        INDArray features = Nd4j.ones(25, 25);
        INDArray label = Nd4j.create(new float[] {1}, new int[] {1});
        DataSet dataset = new DataSet(features, label);
        listData.add(dataset);
    }
    DataSet data = DataSet.merge(listData);
    data.shuffle();
}

Example 14

Source File: LoneTest.java From nd4j with Apache License 2.0

5 votes

@Test
public void maskWhenMerge() {
    DataSet dsA = new DataSet(Nd4j.linspace(1, 15, 15).reshape(1, 3, 5), Nd4j.zeros(1, 3, 5));
    DataSet dsB = new DataSet(Nd4j.linspace(1, 9, 9).reshape(1, 3, 3), Nd4j.zeros(1, 3, 3));
    List<DataSet> dataSetList = new ArrayList<DataSet>();
    dataSetList.add(dsA);
    dataSetList.add(dsB);
    DataSet fullDataSet = DataSet.merge(dataSetList);
    assertTrue(fullDataSet.getFeaturesMaskArray() != null);

    DataSet fullDataSetCopy = fullDataSet.copy();
    assertTrue(fullDataSetCopy.getFeaturesMaskArray() != null);

}

Example 15

Source File: UnderSamplingPreProcessorTest.java From nd4j with Apache License 2.0

5 votes

public DataSet knownDistVariedDataSet(float[] dist, boolean twoClass) {
    //construct a dataset with known distribution of minority class and varying time steps
    DataSet batchATimeSteps = makeDataSetSameL(minibatchSize, shortSeq, dist, twoClass);
    DataSet batchBTimeSteps = makeDataSetSameL(minibatchSize, longSeq, dist, twoClass);
    List<DataSet> listofbatches = new ArrayList<>();
    listofbatches.add(batchATimeSteps);
    listofbatches.add(batchBTimeSteps);
    return DataSet.merge(listofbatches);
}

Example 16

Source File: TestCompareParameterAveragingSparkVsSingleMachine.java From deeplearning4j with Apache License 2.0

4 votes

private DataSet getOneDataSet(int totalExamples, int seed) {
    return DataSet.merge(getOneDataSetAsIndividalExamples(totalExamples, seed));
}

Example 17

Source File: FileDataSetIterator.java From deeplearning4j with Apache License 2.0

4 votes

@Override
protected DataSet merge(List<DataSet> toMerge) {
    return DataSet.merge(toMerge);
}

Example 18

Source File: GradientSharingTrainingTest.java From deeplearning4j with Apache License 2.0

4 votes

@Test @Ignore //AB https://github.com/eclipse/deeplearning4j/issues/8985
public void differentNetsTrainingTest() throws Exception {
    int batch = 3;

    File temp = testDir.newFolder();
    DataSet ds = new IrisDataSetIterator(150, 150).next();
    List<DataSet> list = ds.asList();
    Collections.shuffle(list, new Random(12345));
    int pos = 0;
    int dsCount = 0;
    while (pos < list.size()) {
        List<DataSet> l2 = new ArrayList<>();
        for (int i = 0; i < 3 && pos < list.size(); i++) {
            l2.add(list.get(pos++));
        }
        DataSet d = DataSet.merge(l2);
        File f = new File(temp, dsCount++ + ".bin");
        d.save(f);
    }

    INDArray last = null;
    INDArray lastDup = null;
    for (int i = 0; i < 2; i++) {
        System.out.println("||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||");
        log.info("Starting: {}", i);

        MultiLayerConfiguration conf;
        if (i == 0) {
            conf = new NeuralNetConfiguration.Builder()
                    .weightInit(WeightInit.XAVIER)
                    .seed(12345)
                    .list()
                    .layer(new OutputLayer.Builder().nIn(4).nOut(3).activation(Activation.SOFTMAX).lossFunction(LossFunctions.LossFunction.MCXENT).build())
                    .build();
        } else {
            conf = new NeuralNetConfiguration.Builder()
                    .weightInit(WeightInit.XAVIER)
                    .seed(12345)
                    .list()
                    .layer(new DenseLayer.Builder().nIn(4).nOut(4).activation(Activation.TANH).build())
                    .layer(new OutputLayer.Builder().nIn(4).nOut(3).activation(Activation.SOFTMAX).lossFunction(LossFunctions.LossFunction.MCXENT).build())
                    .build();
        }
        MultiLayerNetwork net = new MultiLayerNetwork(conf);
        net.init();


        //TODO this probably won't work everywhere...
        String controller = Inet4Address.getLocalHost().getHostAddress();
        String networkMask = controller.substring(0, controller.lastIndexOf('.')) + ".0" + "/16";

        VoidConfiguration voidConfiguration = VoidConfiguration.builder()
                .unicastPort(40123) // Should be open for IN/OUT communications on all Spark nodes
                .networkMask(networkMask) // Local network mask
                .controllerAddress(controller)
                .build();
        TrainingMaster tm = new SharedTrainingMaster.Builder(voidConfiguration, 2, new FixedThresholdAlgorithm(1e-4), batch)
                .rngSeed(12345)
                .collectTrainingStats(false)
                .batchSizePerWorker(batch) // Minibatch size for each worker
                .workersPerNode(2) // Workers per node
                .build();


        SparkDl4jMultiLayer sparkNet = new SparkDl4jMultiLayer(sc, net, tm);

        //System.out.println(Arrays.toString(sparkNet.getNetwork().params().get(NDArrayIndex.point(0), NDArrayIndex.interval(0, 256)).dup().data().asFloat()));

        String fitPath = "file:///" + temp.getAbsolutePath().replaceAll("\\\\", "/");
        INDArray paramsBefore = net.params().dup();
        for( int j=0; j<3; j++ ) {
            sparkNet.fit(fitPath);
        }

        INDArray paramsAfter = net.params();
        assertNotEquals(paramsBefore, paramsAfter);

        //Also check we don't have any issues
        if(i == 0) {
            last = sparkNet.getNetwork().params();
            lastDup = last.dup();
        } else {
            assertEquals(lastDup, last);
        }
    }
}

Example 19

Source File: ScoreExamplesWithKeyFunction.java From deeplearning4j with Apache License 2.0

4 votes

@Override
public Iterator<Tuple2<K, Double>> call(Iterator<Tuple2<K, DataSet>> iterator) throws Exception {
    if (!iterator.hasNext()) {
        return Collections.emptyIterator();
    }

    MultiLayerNetwork network = new MultiLayerNetwork(MultiLayerConfiguration.fromJson(jsonConfig.getValue()));
    network.init();
    INDArray val = params.value().unsafeDuplication();
    if (val.length() != network.numParams(false))
        throw new IllegalStateException(
                        "Network did not have same number of parameters as the broadcast set parameters");
    network.setParameters(val);

    List<Tuple2<K, Double>> ret = new ArrayList<>();

    List<DataSet> collect = new ArrayList<>(batchSize);
    List<K> collectKey = new ArrayList<>(batchSize);
    int totalCount = 0;
    while (iterator.hasNext()) {
        collect.clear();
        collectKey.clear();
        int nExamples = 0;
        while (iterator.hasNext() && nExamples < batchSize) {
            Tuple2<K, DataSet> t2 = iterator.next();
            DataSet ds = t2._2();
            int n = ds.numExamples();
            if (n != 1)
                throw new IllegalStateException("Cannot score examples with one key per data set if "
                                + "data set contains more than 1 example (numExamples: " + n + ")");
            collect.add(ds);
            collectKey.add(t2._1());
            nExamples += n;
        }
        totalCount += nExamples;

        DataSet data = DataSet.merge(collect);


        INDArray scores = network.scoreExamples(data, addRegularization);
        double[] doubleScores = scores.data().asDouble();

        for (int i = 0; i < doubleScores.length; i++) {
            ret.add(new Tuple2<>(collectKey.get(i), doubleScores[i]));
        }
    }

    Nd4j.getExecutioner().commit();

    if (log.isDebugEnabled()) {
        log.debug("Scored {} examples ", totalCount);
    }

    return ret.iterator();
}

Example 20

Source File: ScoreExamplesFunction.java From deeplearning4j with Apache License 2.0

4 votes

@Override
public Iterator<Double> call(Iterator<DataSet> iterator) throws Exception {
    if (!iterator.hasNext()) {
        return Collections.emptyIterator();
    }

    MultiLayerNetwork network = new MultiLayerNetwork(MultiLayerConfiguration.fromJson(jsonConfig.getValue()));
    network.init();
    INDArray val = params.value().unsafeDuplication();
    if (val.length() != network.numParams(false))
        throw new IllegalStateException(
                        "Network did not have same number of parameters as the broadcast set parameters");
    network.setParameters(val);

    List<Double> ret = new ArrayList<>();

    List<DataSet> collect = new ArrayList<>(batchSize);
    int totalCount = 0;
    while (iterator.hasNext()) {
        collect.clear();
        int nExamples = 0;
        while (iterator.hasNext() && nExamples < batchSize) {
            DataSet ds = iterator.next();
            int n = ds.numExamples();
            collect.add(ds);
            nExamples += n;
        }
        totalCount += nExamples;

        DataSet data = DataSet.merge(collect);


        INDArray scores = network.scoreExamples(data, addRegularization);
        double[] doubleScores = scores.data().asDouble();

        for (double doubleScore : doubleScores) {
            ret.add(doubleScore);
        }
    }

    Nd4j.getExecutioner().commit();

    if (log.isDebugEnabled()) {
        log.debug("Scored {} examples ", totalCount);
    }

    return ret.iterator();
}