org.datavec.local.transforms.LocalTransformExecutor Java Examples

The following examples show how to use org.datavec.local.transforms.LocalTransformExecutor. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: TestPythonTransformProcess.java    From deeplearning4j with Apache License 2.0 6 votes vote down vote up
@Test
public void testNumpyTransform() {
    PythonTransform pythonTransform = PythonTransform.builder()
            .code("a += 2; b = 'hello world'")
            .returnAllInputs(true)
            .build();

    List<List<Writable>> inputs = new ArrayList<>();
    inputs.add(Arrays.asList((Writable) new NDArrayWritable(Nd4j.scalar(1).reshape(1,1))));
    Schema inputSchema = new Builder()
            .addColumnNDArray("a",new long[]{1,1})
            .build();

    TransformProcess tp = new TransformProcess.Builder(inputSchema)
            .transform(pythonTransform)
            .build();
    List<List<Writable>> execute = LocalTransformExecutor.execute(inputs, tp);
    assertFalse(execute.isEmpty());
    assertNotNull(execute.get(0));
    assertNotNull(execute.get(0).get(0));
    assertNotNull(execute.get(0).get(1));
    assertEquals(Nd4j.scalar(3).reshape(1, 1),((NDArrayWritable)execute.get(0).get(0)).get());
    assertEquals("hello world",execute.get(0).get(1).toString());
}
 
Example #2
Source File: TestPythonTransformProcess.java    From deeplearning4j with Apache License 2.0 6 votes vote down vote up
@Test
public void testPythonTransformNoOutputSpecified() throws Exception {
    PythonTransform pythonTransform = PythonTransform.builder()
            .code("a += 2; b = 'hello world'")
            .returnAllInputs(true)
            .build();
    List<List<Writable>> inputs = new ArrayList<>();
    inputs.add(Arrays.asList((Writable)new IntWritable(1)));
    Schema inputSchema = new Builder()
            .addColumnInteger("a")
            .build();

    TransformProcess tp = new TransformProcess.Builder(inputSchema)
            .transform(pythonTransform)
            .build();
    List<List<Writable>> execute = LocalTransformExecutor.execute(inputs, tp);
    assertEquals(3,execute.get(0).get(0).toInt());
    assertEquals("hello world",execute.get(0).get(1).toString());

}
 
Example #3
Source File: LocalTransformFunction.java    From deeplearning4j with Apache License 2.0 5 votes vote down vote up
@Override
public List<Writable> apply(List<Writable> v1) {
    if (LocalTransformExecutor.isTryCatch()) {
        try {
            return transform.map(v1);
        } catch (Exception e) {
            log.warn("Error occurred " + e + " on record " + v1);
            return new ArrayList<>();
        }
    }
    return transform.map(v1);
}
 
Example #4
Source File: ExecutionTest.java    From deeplearning4j with Apache License 2.0 5 votes vote down vote up
@Test(timeout = 60000L)
@Ignore("AB 2019/05/21 - Fine locally, timeouts on CI - Issue #7657 and #7771")
public void testPythonExecutionNdarray()throws Exception{
    Schema schema = new Schema.Builder()
            .addColumnNDArray("first",new long[]{1,32577})
            .addColumnNDArray("second",new long[]{1,32577}).build();

    TransformProcess transformProcess = new TransformProcess.Builder(schema)
            .transform(
                    PythonTransform.builder().code(
                            "first = np.sin(first)\nsecond = np.cos(second)")
                    .outputSchema(schema).build())
            .build();

    List<List<Writable>> functions = new ArrayList<>();
    List<Writable> firstRow = new ArrayList<>();
    INDArray firstArr = Nd4j.linspace(1,4,4);
    INDArray secondArr = Nd4j.linspace(1,4,4);
    firstRow.add(new NDArrayWritable(firstArr));
    firstRow.add(new NDArrayWritable(secondArr));
    functions.add(firstRow);

    List<List<Writable>> execute = LocalTransformExecutor.execute(functions, transformProcess);
    INDArray firstResult = ((NDArrayWritable) execute.get(0).get(0)).get();
    INDArray secondResult = ((NDArrayWritable) execute.get(0).get(1)).get();

    INDArray expected = Transforms.sin(firstArr);
    INDArray secondExpected = Transforms.cos(secondArr);
    assertEquals(expected,firstResult);
    assertEquals(secondExpected,secondResult);

}
 
Example #5
Source File: ExecutionTest.java    From deeplearning4j with Apache License 2.0 5 votes vote down vote up
@Test
public void testReductionGlobal() {

    List<List<Writable>> in = Arrays.asList(
            Arrays.<Writable>asList(new Text("first"), new DoubleWritable(3.0)),
            Arrays.<Writable>asList(new Text("second"), new DoubleWritable(5.0))
    );

    List<List<Writable>> inData = in;

    Schema s = new Schema.Builder()
            .addColumnString("textCol")
            .addColumnDouble("doubleCol")
            .build();

    TransformProcess tp = new TransformProcess.Builder(s)
            .reduce(new Reducer.Builder(ReduceOp.TakeFirst)
                    .takeFirstColumns("textCol")
                    .meanColumns("doubleCol").build())
            .build();

    List<List<Writable>> outRdd = LocalTransformExecutor.execute(inData, tp);

    List<List<Writable>> out = outRdd;

    List<List<Writable>> expOut = Collections.singletonList(Arrays.<Writable>asList(new Text("first"), new DoubleWritable(4.0)));

    assertEquals(expOut, out);
}
 
Example #6
Source File: ExecutionTest.java    From deeplearning4j with Apache License 2.0 5 votes vote down vote up
@Test
public void testFilter() {
    Schema filterSchema = new Schema.Builder()
            .addColumnDouble("col1").addColumnDouble("col2")
            .addColumnDouble("col3").build();
    List<List<Writable>> inputData = new ArrayList<>();
    inputData.add(Arrays.<Writable>asList(new IntWritable(0), new DoubleWritable(1), new DoubleWritable(0.1)));
    inputData.add(Arrays.<Writable>asList(new IntWritable(1), new DoubleWritable(3), new DoubleWritable(1.1)));
    inputData.add(Arrays.<Writable>asList(new IntWritable(2), new DoubleWritable(3), new DoubleWritable(2.1)));
    TransformProcess transformProcess = new TransformProcess.Builder(filterSchema)
            .filter(new DoubleColumnCondition("col1",ConditionOp.LessThan,1)).build();
    List<List<Writable>> execute = LocalTransformExecutor.execute(inputData, transformProcess);
    assertEquals(2,execute.size());
}
 
Example #7
Source File: ExecutionTest.java    From deeplearning4j with Apache License 2.0 5 votes vote down vote up
@Test
public void testExecutionSimple() {
    Schema schema = new Schema.Builder().addColumnInteger("col0")
            .addColumnCategorical("col1", "state0", "state1", "state2").addColumnDouble("col2").
                    addColumnFloat("col3").build();

    TransformProcess tp = new TransformProcess.Builder(schema).categoricalToInteger("col1")
            .doubleMathOp("col2", MathOp.Add, 10.0).floatMathOp("col3", MathOp.Add, 5f).build();

    List<List<Writable>> inputData = new ArrayList<>();
    inputData.add(Arrays.<Writable>asList(new IntWritable(0), new Text("state2"), new DoubleWritable(0.1), new FloatWritable(0.3f)));
    inputData.add(Arrays.<Writable>asList(new IntWritable(1), new Text("state1"), new DoubleWritable(1.1), new FloatWritable(1.7f)));
    inputData.add(Arrays.<Writable>asList(new IntWritable(2), new Text("state0"), new DoubleWritable(2.1), new FloatWritable(3.6f)));

    List<List<Writable>> rdd = (inputData);

    List<List<Writable>> out = new ArrayList<>(LocalTransformExecutor.execute(rdd, tp));

    Collections.sort(out, new Comparator<List<Writable>>() {
        @Override
        public int compare(List<Writable> o1, List<Writable> o2) {
            return Integer.compare(o1.get(0).toInt(), o2.get(0).toInt());
        }
    });

    List<List<Writable>> expected = new ArrayList<>();
    expected.add(Arrays.<Writable>asList(new IntWritable(0), new IntWritable(2), new DoubleWritable(10.1), new FloatWritable(5.3f)));
    expected.add(Arrays.<Writable>asList(new IntWritable(1), new IntWritable(1), new DoubleWritable(11.1), new FloatWritable(6.7f)));
    expected.add(Arrays.<Writable>asList(new IntWritable(2), new IntWritable(0), new DoubleWritable(12.1), new FloatWritable(8.6f)));

    assertEquals(expected, out);
}
 
Example #8
Source File: ExecutionTest.java    From deeplearning4j with Apache License 2.0 5 votes vote down vote up
@Test
public void testExecutionNdarray() {
    Schema schema = new Schema.Builder()
            .addColumnNDArray("first",new long[]{1,32577})
            .addColumnNDArray("second",new long[]{1,32577}).build();

    TransformProcess transformProcess = new TransformProcess.Builder(schema)
            .ndArrayMathFunctionTransform("first", MathFunction.SIN)
            .ndArrayMathFunctionTransform("second",MathFunction.COS)
            .build();

    List<List<Writable>> functions = new ArrayList<>();
    List<Writable> firstRow = new ArrayList<>();
    INDArray firstArr = Nd4j.linspace(1,4,4);
    INDArray secondArr = Nd4j.linspace(1,4,4);
    firstRow.add(new NDArrayWritable(firstArr));
    firstRow.add(new NDArrayWritable(secondArr));
    functions.add(firstRow);

    List<List<Writable>> execute = LocalTransformExecutor.execute(functions, transformProcess);
    INDArray firstResult = ((NDArrayWritable) execute.get(0).get(0)).get();
    INDArray secondResult = ((NDArrayWritable) execute.get(0).get(1)).get();

    INDArray expected = Transforms.sin(firstArr);
    INDArray secondExpected = Transforms.cos(secondArr);
    assertEquals(expected,firstResult);
    assertEquals(secondExpected,secondResult);

}
 
Example #9
Source File: TestCalculateSortedRank.java    From deeplearning4j with Apache License 2.0 5 votes vote down vote up
@Test
public void testCalculateSortedRank() {

    List<List<Writable>> data = new ArrayList<>();
    data.add(Arrays.asList((Writable) new Text("0"), new DoubleWritable(0.0)));
    data.add(Arrays.asList((Writable) new Text("3"), new DoubleWritable(0.3)));
    data.add(Arrays.asList((Writable) new Text("2"), new DoubleWritable(0.2)));
    data.add(Arrays.asList((Writable) new Text("1"), new DoubleWritable(0.1)));

    List<List<Writable>> rdd = (data);

    Schema schema = new Schema.Builder().addColumnsString("TextCol").addColumnDouble("DoubleCol").build();

    TransformProcess tp = new TransformProcess.Builder(schema)
                    .calculateSortedRank("rank", "DoubleCol", new DoubleWritableComparator()).build();

    Schema outSchema = tp.getFinalSchema();
    assertEquals(3, outSchema.numColumns());
    assertEquals(Arrays.asList("TextCol", "DoubleCol", "rank"), outSchema.getColumnNames());
    assertEquals(Arrays.asList(ColumnType.String, ColumnType.Double, ColumnType.Long), outSchema.getColumnTypes());

    List<List<Writable>> out = LocalTransformExecutor.execute(rdd, tp);

    List<List<Writable>> collected = out;
    assertEquals(4, collected.size());
    for (int i = 0; i < 4; i++)
        assertEquals(3, collected.get(i).size());

    for (List<Writable> example : collected) {
        int exampleNum = example.get(0).toInt();
        int rank = example.get(2).toInt();
        assertEquals(exampleNum, rank);
    }
}
 
Example #10
Source File: TestConvertToSequence.java    From deeplearning4j with Apache License 2.0 5 votes vote down vote up
@Test
public void testConvertToSequenceLength1() {

    Schema s = new Schema.Builder()
            .addColumnsString("string")
            .addColumnLong("long")
            .build();

    List<List<Writable>> allExamples = Arrays.asList(
            Arrays.<Writable>asList(new Text("a"), new LongWritable(0)),
            Arrays.<Writable>asList(new Text("b"), new LongWritable(1)),
            Arrays.<Writable>asList(new Text("c"), new LongWritable(2)));

    TransformProcess tp = new TransformProcess.Builder(s)
            .convertToSequence()
            .build();

    List<List<Writable>> rdd = (allExamples);

    ArrowWritableRecordTimeSeriesBatch out = (ArrowWritableRecordTimeSeriesBatch) LocalTransformExecutor.executeToSequence(rdd, tp);

    List<List<List<Writable>>> out2 = out.toArrayList();

    assertEquals(3, out2.size());

    for( int i = 0; i < 3; i++) {
        assertTrue(out2.contains(Collections.singletonList(allExamples.get(i))));
    }
}
 
Example #11
Source File: TestPythonTransformProcess.java    From deeplearning4j with Apache License 2.0 5 votes vote down vote up
@Test
public void testWithSetupRun() throws Exception {

    PythonTransform pythonTransform = PythonTransform.builder()
            .code("five=None\n" +
                    "def setup():\n" +
                    "    global five\n"+
                    "    five = 5\n\n" +
                    "def run(a, b):\n" +
                    "    c = a + b + five\n"+
                    "    return {'c':c}\n\n")
            .returnAllInputs(true)
            .setupAndRun(true)
            .build();

    List<List<Writable>> inputs = new ArrayList<>();
    inputs.add(Arrays.asList((Writable) new NDArrayWritable(Nd4j.scalar(1).reshape(1,1)),
            new NDArrayWritable(Nd4j.scalar(2).reshape(1,1))));
    Schema inputSchema = new Builder()
            .addColumnNDArray("a",new long[]{1,1})
            .addColumnNDArray("b", new long[]{1, 1})
            .build();

    TransformProcess tp = new TransformProcess.Builder(inputSchema)
            .transform(pythonTransform)
            .build();
    List<List<Writable>> execute = LocalTransformExecutor.execute(inputs, tp);
    assertFalse(execute.isEmpty());
    assertNotNull(execute.get(0));
    assertNotNull(execute.get(0).get(0));
    assertEquals(Nd4j.scalar(8).reshape(1, 1),((NDArrayWritable)execute.get(0).get(3)).get());
}
 
Example #12
Source File: TransformProcessStepRunner.java    From konduit-serving with Apache License 2.0 5 votes vote down vote up
@Override
public Record[] transform(Record[] input) {
    Record[] ret = new Record[input.length];
    for (int i = 0; i < input.length; i++) {
        if (pipelineStep.inputNameIsValidForStep(pipelineStep.inputNameAt(i))) {
            TransformProcess toExecute = transformProcesses.get(pipelineStep.inputNameAt(i));
            Preconditions.checkNotNull(toExecute, "No transform process found for name " + (pipelineStep.inputNameAt(i)));
            ret[i] = new org.datavec.api.records.impl.Record(LocalTransformExecutor.execute(Arrays.asList(input[i].getRecord()), toExecute).get(0), null);

        } else {
            ret[i] = input[i];
        }
    }
    return ret;
}
 
Example #13
Source File: ExecutionTest.java    From DataVec with Apache License 2.0 5 votes vote down vote up
@Test
public void testReductionGlobal() {

    List<List<Writable>> in = Arrays.asList(
            Arrays.<Writable>asList(new Text("first"), new DoubleWritable(3.0)),
            Arrays.<Writable>asList(new Text("second"), new DoubleWritable(5.0))
    );

    List<List<Writable>> inData = in;

    Schema s = new Schema.Builder()
            .addColumnString("textCol")
            .addColumnDouble("doubleCol")
            .build();

    TransformProcess tp = new TransformProcess.Builder(s)
            .reduce(new Reducer.Builder(ReduceOp.TakeFirst)
                    .takeFirstColumns("textCol")
                    .meanColumns("doubleCol").build())
            .build();

    List<List<Writable>> outRdd = LocalTransformExecutor.execute(inData, tp);

    List<List<Writable>> out = outRdd;

    List<List<Writable>> expOut = Collections.singletonList(Arrays.<Writable>asList(new Text("first"), new DoubleWritable(4.0)));

    assertEquals(expOut, out);
}
 
Example #14
Source File: LocalTransformFunction.java    From DataVec with Apache License 2.0 5 votes vote down vote up
@Override
public List<Writable> apply(List<Writable> v1) {
    if (LocalTransformExecutor.isTryCatch()) {
        try {
            return transform.map(v1);
        } catch (Exception e) {
            log.warn("Error occurred " + e + " on record " + v1);
            return new ArrayList<>();
        }
    }
    return transform.map(v1);
}
 
Example #15
Source File: ExecutionTest.java    From DataVec with Apache License 2.0 5 votes vote down vote up
@Test
public void testFilter() {
    Schema filterSchema = new Schema.Builder()
            .addColumnDouble("col1").addColumnDouble("col2")
            .addColumnDouble("col3").build();
    List<List<Writable>> inputData = new ArrayList<>();
    inputData.add(Arrays.<Writable>asList(new IntWritable(0), new DoubleWritable(1), new DoubleWritable(0.1)));
    inputData.add(Arrays.<Writable>asList(new IntWritable(1), new DoubleWritable(3), new DoubleWritable(1.1)));
    inputData.add(Arrays.<Writable>asList(new IntWritable(2), new DoubleWritable(3), new DoubleWritable(2.1)));
    TransformProcess transformProcess = new TransformProcess.Builder(filterSchema)
            .filter(new DoubleColumnCondition("col1",ConditionOp.LessThan,1)).build();
    List<List<Writable>> execute = LocalTransformExecutor.execute(inputData, transformProcess);
    assertEquals(2,execute.size());
}
 
Example #16
Source File: ExecutionTest.java    From DataVec with Apache License 2.0 5 votes vote down vote up
@Test
public void testExecutionSimple() {
    Schema schema = new Schema.Builder().addColumnInteger("col0")
            .addColumnCategorical("col1", "state0", "state1", "state2").addColumnDouble("col2").build();

    TransformProcess tp = new TransformProcess.Builder(schema).categoricalToInteger("col1")
            .doubleMathOp("col2", MathOp.Add, 10.0).build();

    List<List<Writable>> inputData = new ArrayList<>();
    inputData.add(Arrays.<Writable>asList(new IntWritable(0), new Text("state2"), new DoubleWritable(0.1)));
    inputData.add(Arrays.<Writable>asList(new IntWritable(1), new Text("state1"), new DoubleWritable(1.1)));
    inputData.add(Arrays.<Writable>asList(new IntWritable(2), new Text("state0"), new DoubleWritable(2.1)));

    List<List<Writable>> rdd = (inputData);

    List<List<Writable>> out = new ArrayList<>(LocalTransformExecutor.execute(rdd, tp));

    Collections.sort(out, new Comparator<List<Writable>>() {
        @Override
        public int compare(List<Writable> o1, List<Writable> o2) {
            return Integer.compare(o1.get(0).toInt(), o2.get(0).toInt());
        }
    });

    List<List<Writable>> expected = new ArrayList<>();
    expected.add(Arrays.<Writable>asList(new IntWritable(0), new IntWritable(2), new DoubleWritable(10.1)));
    expected.add(Arrays.<Writable>asList(new IntWritable(1), new IntWritable(1), new DoubleWritable(11.1)));
    expected.add(Arrays.<Writable>asList(new IntWritable(2), new IntWritable(0), new DoubleWritable(12.1)));

    assertEquals(expected, out);
}
 
Example #17
Source File: TestCalculateSortedRank.java    From DataVec with Apache License 2.0 5 votes vote down vote up
@Test
public void testCalculateSortedRank() {

    List<List<Writable>> data = new ArrayList<>();
    data.add(Arrays.asList((Writable) new Text("0"), new DoubleWritable(0.0)));
    data.add(Arrays.asList((Writable) new Text("3"), new DoubleWritable(0.3)));
    data.add(Arrays.asList((Writable) new Text("2"), new DoubleWritable(0.2)));
    data.add(Arrays.asList((Writable) new Text("1"), new DoubleWritable(0.1)));

    List<List<Writable>> rdd = (data);

    Schema schema = new Schema.Builder().addColumnsString("TextCol").addColumnDouble("DoubleCol").build();

    TransformProcess tp = new TransformProcess.Builder(schema)
                    .calculateSortedRank("rank", "DoubleCol", new DoubleWritableComparator()).build();

    Schema outSchema = tp.getFinalSchema();
    assertEquals(3, outSchema.numColumns());
    assertEquals(Arrays.asList("TextCol", "DoubleCol", "rank"), outSchema.getColumnNames());
    assertEquals(Arrays.asList(ColumnType.String, ColumnType.Double, ColumnType.Long), outSchema.getColumnTypes());

    List<List<Writable>> out = LocalTransformExecutor.execute(rdd, tp);

    List<List<Writable>> collected = out;
    assertEquals(4, collected.size());
    for (int i = 0; i < 4; i++)
        assertEquals(3, collected.get(i).size());

    for (List<Writable> example : collected) {
        int exampleNum = example.get(0).toInt();
        int rank = example.get(2).toInt();
        assertEquals(exampleNum, rank);
    }
}
 
Example #18
Source File: TestConvertToSequence.java    From DataVec with Apache License 2.0 5 votes vote down vote up
@Test
public void testConvertToSequenceLength1() {

    Schema s = new Schema.Builder()
            .addColumnsString("string")
            .addColumnLong("long")
            .build();

    List<List<Writable>> allExamples = Arrays.asList(
            Arrays.<Writable>asList(new Text("a"), new LongWritable(0)),
            Arrays.<Writable>asList(new Text("b"), new LongWritable(1)),
            Arrays.<Writable>asList(new Text("c"), new LongWritable(2)));

    TransformProcess tp = new TransformProcess.Builder(s)
            .convertToSequence()
            .build();

    List<List<Writable>> rdd = (allExamples);

    ArrowWritableRecordTimeSeriesBatch out = (ArrowWritableRecordTimeSeriesBatch) LocalTransformExecutor.executeToSequence(rdd, tp);

    List<List<List<Writable>>> out2 = out.toArrayList();

    assertEquals(3, out2.size());

    for( int i = 0; i < 3; i++) {
        assertTrue(out2.contains(Collections.singletonList(allExamples.get(i))));
    }
}
 
Example #19
Source File: LocalExecuteExample.java    From Java-Deep-Learning-Cookbook with MIT License 4 votes vote down vote up
public static void main(String[] args) throws Exception {
    try {
        int numClasses = 2;
        int batchSize = 8;

        File file = new File("Path/to/titanic.csv-file");
        RecordReader recordReader = new CSVRecordReader(1,',');
        recordReader.initialize(new FileSplit(file));
        // WritableConverter writableConverter = new SelfWritableConverter();

        Schema schema = new Schema.Builder()
                .addColumnInteger("Survived")
                .addColumnCategorical("Pclass", Arrays.asList("1","2","3"))
                .addColumnString("Name")
                .addColumnCategorical("Sex", Arrays.asList("male","female"))
                .addColumnsInteger("Age","Siblings/Spouses Aboard","Parents/Children Aboard")
                .addColumnDouble("Fare")
                .build();
        TransformProcess transformProcess = new TransformProcess.Builder(schema)
                .removeColumns("Name","Fare")
                .categoricalToInteger("Sex")
                .categoricalToOneHot("Pclass")
                .removeColumns("Pclass[1]")
                .build();

        List<List<Writable>> outputData = new ArrayList<>();

        RecordWriter recordWriter = new CSVRecordWriter();
        Partitioner partitioner = new NumberOfRecordsPartitioner();
        recordWriter.initialize(new FileSplit(new File("/Path/To/LocalExecuteExample.csv/file")),partitioner);

        while(recordReader.hasNext()){
            outputData.add(recordReader.next());
        }
        List<List<Writable>> transformedOutput=LocalTransformExecutor.execute(outputData,transformProcess);
        recordWriter.writeBatch(transformedOutput);
        recordWriter.close();
    } catch (IllegalArgumentException e) {
        System.out.println("Please provide proper file paths for titanic.csv & fle in place of: Path/to/titanic.csv-file && /Path/To/LocalExecuteExample.csv");
        System.out.println("You need to create an empty CSV file and mention the file path in place of /Path/To/LocalExecuteExample.csv");
    }
}
 
Example #20
Source File: ExecutionTest.java    From DataVec with Apache License 2.0 4 votes vote down vote up
@Test
public void testReductionByKey(){

    List<List<Writable>> in = Arrays.asList(
            Arrays.<Writable>asList(new IntWritable(0), new Text("first"), new DoubleWritable(3.0)),
            Arrays.<Writable>asList(new IntWritable(0), new Text("second"), new DoubleWritable(5.0)),
            Arrays.<Writable>asList(new IntWritable(1), new Text("f"), new DoubleWritable(30.0)),
            Arrays.<Writable>asList(new IntWritable(1), new Text("s"), new DoubleWritable(50.0))
    );

    List<List<Writable>> inData = in;

    Schema s = new Schema.Builder()
            .addColumnInteger("intCol")
            .addColumnString("textCol")
            .addColumnDouble("doubleCol")
            .build();

    TransformProcess tp = new TransformProcess.Builder(s)
            .reduce(new Reducer.Builder(ReduceOp.TakeFirst)
                    .keyColumns("intCol")
                    .takeFirstColumns("textCol")
                    .meanColumns("doubleCol").build())
            .build();

    List<List<Writable>> outRdd = LocalTransformExecutor.execute(inData, tp);

    List<List<Writable>> out = outRdd;

    List<List<Writable>> expOut = Arrays.asList(
            Arrays.<Writable>asList(new IntWritable(0), new Text("first"), new DoubleWritable(4.0)),
            Arrays.<Writable>asList(new IntWritable(1), new Text("f"), new DoubleWritable(40.0)));

    out = new ArrayList<>(out);
    Collections.sort(
            out, new Comparator<List<Writable>>() {
                @Override
                public int compare(List<Writable> o1, List<Writable> o2) {
                    return Integer.compare(o1.get(0).toInt(), o2.get(0).toInt());
                }
            }
    );

    assertEquals(expOut, out);
}
 
Example #21
Source File: ExecutionTest.java    From deeplearning4j with Apache License 2.0 4 votes vote down vote up
@Test
public void testReductionByKey(){

    List<List<Writable>> in = Arrays.asList(
            Arrays.<Writable>asList(new IntWritable(0), new Text("first"), new DoubleWritable(3.0)),
            Arrays.<Writable>asList(new IntWritable(0), new Text("second"), new DoubleWritable(5.0)),
            Arrays.<Writable>asList(new IntWritable(1), new Text("f"), new DoubleWritable(30.0)),
            Arrays.<Writable>asList(new IntWritable(1), new Text("s"), new DoubleWritable(50.0))
    );

    List<List<Writable>> inData = in;

    Schema s = new Schema.Builder()
            .addColumnInteger("intCol")
            .addColumnString("textCol")
            .addColumnDouble("doubleCol")
            .build();

    TransformProcess tp = new TransformProcess.Builder(s)
            .reduce(new Reducer.Builder(ReduceOp.TakeFirst)
                    .keyColumns("intCol")
                    .takeFirstColumns("textCol")
                    .meanColumns("doubleCol").build())
            .build();

    List<List<Writable>> outRdd = LocalTransformExecutor.execute(inData, tp);

    List<List<Writable>> out = outRdd;

    List<List<Writable>> expOut = Arrays.asList(
            Arrays.<Writable>asList(new IntWritable(0), new Text("first"), new DoubleWritable(4.0)),
            Arrays.<Writable>asList(new IntWritable(1), new Text("f"), new DoubleWritable(40.0)));

    out = new ArrayList<>(out);
    Collections.sort(
            out, new Comparator<List<Writable>>() {
                @Override
                public int compare(List<Writable> o1, List<Writable> o2) {
                    return Integer.compare(o1.get(0).toInt(), o2.get(0).toInt());
                }
            }
    );

    assertEquals(expOut, out);
}
 
Example #22
Source File: ExecutionTest.java    From deeplearning4j with Apache License 2.0 4 votes vote down vote up
@Test
public void testExecutionSequence() {

    Schema schema = new SequenceSchema.Builder().addColumnInteger("col0")
            .addColumnCategorical("col1", "state0", "state1", "state2").addColumnDouble("col2").build();

    TransformProcess tp = new TransformProcess.Builder(schema).categoricalToInteger("col1")
            .doubleMathOp("col2", MathOp.Add, 10.0).build();

    List<List<List<Writable>>> inputSequences = new ArrayList<>();
    List<List<Writable>> seq1 = new ArrayList<>();
    seq1.add(Arrays.<Writable>asList(new IntWritable(0), new Text("state2"), new DoubleWritable(0.1)));
    seq1.add(Arrays.<Writable>asList(new IntWritable(1), new Text("state1"), new DoubleWritable(1.1)));
    seq1.add(Arrays.<Writable>asList(new IntWritable(2), new Text("state0"), new DoubleWritable(2.1)));
    List<List<Writable>> seq2 = new ArrayList<>();
    seq2.add(Arrays.<Writable>asList(new IntWritable(3), new Text("state0"), new DoubleWritable(3.1)));
    seq2.add(Arrays.<Writable>asList(new IntWritable(4), new Text("state1"), new DoubleWritable(4.1)));

    inputSequences.add(seq1);
    inputSequences.add(seq2);

    List<List<List<Writable>>> rdd =  (inputSequences);

    List<List<List<Writable>>> out = LocalTransformExecutor.executeSequenceToSequence(rdd, tp);

    Collections.sort(out, new Comparator<List<List<Writable>>>() {
        @Override
        public int compare(List<List<Writable>> o1, List<List<Writable>> o2) {
            return -Integer.compare(o1.size(), o2.size());
        }
    });

    List<List<List<Writable>>> expectedSequence = new ArrayList<>();
    List<List<Writable>> seq1e = new ArrayList<>();
    seq1e.add(Arrays.<Writable>asList(new IntWritable(0), new IntWritable(2), new DoubleWritable(10.1)));
    seq1e.add(Arrays.<Writable>asList(new IntWritable(1), new IntWritable(1), new DoubleWritable(11.1)));
    seq1e.add(Arrays.<Writable>asList(new IntWritable(2), new IntWritable(0), new DoubleWritable(12.1)));
    List<List<Writable>> seq2e = new ArrayList<>();
    seq2e.add(Arrays.<Writable>asList(new IntWritable(3), new IntWritable(0), new DoubleWritable(13.1)));
    seq2e.add(Arrays.<Writable>asList(new IntWritable(4), new IntWritable(1), new DoubleWritable(14.1)));

    expectedSequence.add(seq1e);
    expectedSequence.add(seq2e);

    assertEquals(expectedSequence, out);
}
 
Example #23
Source File: TestConvertToSequence.java    From DataVec with Apache License 2.0 4 votes vote down vote up
@Test
public void testConvertToSequenceCompoundKey() {

    Schema s = new Schema.Builder().addColumnsString("key1", "key2").addColumnLong("time").build();

    List<List<Writable>> allExamples =
                    Arrays.asList(Arrays.<Writable>asList(new Text("k1a"), new Text("k2a"), new LongWritable(10)),
                                    Arrays.<Writable>asList(new Text("k1b"), new Text("k2b"), new LongWritable(10)),
                                    Arrays.<Writable>asList(new Text("k1a"), new Text("k2a"),
                                                    new LongWritable(-10)),
                                    Arrays.<Writable>asList(new Text("k1b"), new Text("k2b"), new LongWritable(5)),
                                    Arrays.<Writable>asList(new Text("k1a"), new Text("k2a"), new LongWritable(0)));

    TransformProcess tp = new TransformProcess.Builder(s)
                    .convertToSequence(Arrays.asList("key1", "key2"), new NumericalColumnComparator("time"))
                    .build();

    List<List<Writable>> rdd = (allExamples);

    List<List<List<Writable>>> out = LocalTransformExecutor.executeToSequence(rdd, tp);

    assertEquals(2, out.size());
    List<List<Writable>> seq0;
    List<List<Writable>> seq1;
    if (out.get(0).size() == 3) {
        seq0 = out.get(0);
        seq1 = out.get(1);
    } else {
        seq0 = out.get(1);
        seq1 = out.get(0);
    }

    List<List<Writable>> expSeq0 = Arrays.asList(
                    Arrays.<Writable>asList(new Text("k1a"), new Text("k2a"), new LongWritable(-10)),
                    Arrays.<Writable>asList(new Text("k1a"), new Text("k2a"), new LongWritable(0)),
                    Arrays.<Writable>asList(new Text("k1a"), new Text("k2a"), new LongWritable(10)));

    List<List<Writable>> expSeq1 = Arrays.asList(
                    Arrays.<Writable>asList(new Text("k1b"), new Text("k2b"), new LongWritable(5)),
                    Arrays.<Writable>asList(new Text("k1b"), new Text("k2b"), new LongWritable(10)));

    assertEquals(expSeq0, seq0);
    assertEquals(expSeq1, seq1);
}
 
Example #24
Source File: TestConvertToSequence.java    From deeplearning4j with Apache License 2.0 4 votes vote down vote up
@Test
public void testConvertToSequenceCompoundKey() {

    Schema s = new Schema.Builder().addColumnsString("key1", "key2").addColumnLong("time").build();

    List<List<Writable>> allExamples =
                    Arrays.asList(Arrays.<Writable>asList(new Text("k1a"), new Text("k2a"), new LongWritable(10)),
                                    Arrays.<Writable>asList(new Text("k1b"), new Text("k2b"), new LongWritable(10)),
                                    Arrays.<Writable>asList(new Text("k1a"), new Text("k2a"),
                                                    new LongWritable(-10)),
                                    Arrays.<Writable>asList(new Text("k1b"), new Text("k2b"), new LongWritable(5)),
                                    Arrays.<Writable>asList(new Text("k1a"), new Text("k2a"), new LongWritable(0)));

    TransformProcess tp = new TransformProcess.Builder(s)
                    .convertToSequence(Arrays.asList("key1", "key2"), new NumericalColumnComparator("time"))
                    .build();

    List<List<Writable>> rdd = (allExamples);

    List<List<List<Writable>>> out = LocalTransformExecutor.executeToSequence(rdd, tp);

    assertEquals(2, out.size());
    List<List<Writable>> seq0;
    List<List<Writable>> seq1;
    if (out.get(0).size() == 3) {
        seq0 = out.get(0);
        seq1 = out.get(1);
    } else {
        seq0 = out.get(1);
        seq1 = out.get(0);
    }

    List<List<Writable>> expSeq0 = Arrays.asList(
                    Arrays.<Writable>asList(new Text("k1a"), new Text("k2a"), new LongWritable(-10)),
                    Arrays.<Writable>asList(new Text("k1a"), new Text("k2a"), new LongWritable(0)),
                    Arrays.<Writable>asList(new Text("k1a"), new Text("k2a"), new LongWritable(10)));

    List<List<Writable>> expSeq1 = Arrays.asList(
                    Arrays.<Writable>asList(new Text("k1b"), new Text("k2b"), new LongWritable(5)),
                    Arrays.<Writable>asList(new Text("k1b"), new Text("k2b"), new LongWritable(10)));

    assertEquals(expSeq0, seq0);
    assertEquals(expSeq1, seq1);
}
 
Example #25
Source File: ExecutionTest.java    From DataVec with Apache License 2.0 4 votes vote down vote up
@Test
public void testExecutionSequence() {

    Schema schema = new SequenceSchema.Builder().addColumnInteger("col0")
            .addColumnCategorical("col1", "state0", "state1", "state2").addColumnDouble("col2").build();

    TransformProcess tp = new TransformProcess.Builder(schema).categoricalToInteger("col1")
            .doubleMathOp("col2", MathOp.Add, 10.0).build();

    List<List<List<Writable>>> inputSequences = new ArrayList<>();
    List<List<Writable>> seq1 = new ArrayList<>();
    seq1.add(Arrays.<Writable>asList(new IntWritable(0), new Text("state2"), new DoubleWritable(0.1)));
    seq1.add(Arrays.<Writable>asList(new IntWritable(1), new Text("state1"), new DoubleWritable(1.1)));
    seq1.add(Arrays.<Writable>asList(new IntWritable(2), new Text("state0"), new DoubleWritable(2.1)));
    List<List<Writable>> seq2 = new ArrayList<>();
    seq2.add(Arrays.<Writable>asList(new IntWritable(3), new Text("state0"), new DoubleWritable(3.1)));
    seq2.add(Arrays.<Writable>asList(new IntWritable(4), new Text("state1"), new DoubleWritable(4.1)));

    inputSequences.add(seq1);
    inputSequences.add(seq2);

    List<List<List<Writable>>> rdd =  (inputSequences);

    List<List<List<Writable>>> out = LocalTransformExecutor.executeSequenceToSequence(rdd, tp);

    Collections.sort(out, new Comparator<List<List<Writable>>>() {
        @Override
        public int compare(List<List<Writable>> o1, List<List<Writable>> o2) {
            return -Integer.compare(o1.size(), o2.size());
        }
    });

    List<List<List<Writable>>> expectedSequence = new ArrayList<>();
    List<List<Writable>> seq1e = new ArrayList<>();
    seq1e.add(Arrays.<Writable>asList(new IntWritable(0), new IntWritable(2), new DoubleWritable(10.1)));
    seq1e.add(Arrays.<Writable>asList(new IntWritable(1), new IntWritable(1), new DoubleWritable(11.1)));
    seq1e.add(Arrays.<Writable>asList(new IntWritable(2), new IntWritable(0), new DoubleWritable(12.1)));
    List<List<Writable>> seq2e = new ArrayList<>();
    seq2e.add(Arrays.<Writable>asList(new IntWritable(3), new IntWritable(0), new DoubleWritable(13.1)));
    seq2e.add(Arrays.<Writable>asList(new IntWritable(4), new IntWritable(1), new DoubleWritable(14.1)));

    expectedSequence.add(seq1e);
    expectedSequence.add(seq2e);

    assertEquals(expectedSequence, out);
}
 
Example #26
Source File: TestPythonTransformProcess.java    From deeplearning4j with Apache License 2.0 4 votes vote down vote up
@Test(timeout = 60000L)
public void testPythonFilterAndTransform() throws Exception{
    Builder schemaBuilder = new Builder();
    schemaBuilder
            .addColumnInteger("col1")
            .addColumnFloat("col2")
            .addColumnString("col3")
            .addColumnDouble("col4");

    Schema initialSchema = schemaBuilder.build();
    schemaBuilder.addColumnString("col6");
    Schema finalSchema = schemaBuilder.build();

    Condition condition = new PythonCondition(
            "f = lambda: col1 < 0 and col2 > 10.0"
    );

    condition.setInputSchema(initialSchema);

    Filter filter = new ConditionFilter(condition);

    String pythonCode = "col6 = str(col1 + col2)";
    TransformProcess tp = new TransformProcess.Builder(initialSchema).transform(
            PythonTransform.builder().code(pythonCode)
                    .outputSchema(finalSchema)
                    .build()
    ).filter(
            filter
    ).build();

    List<List<Writable>> inputs = new ArrayList<>();
    inputs.add(
            Arrays.asList(
                    (Writable)
                    new IntWritable(5),
                    new FloatWritable(3.0f),
                    new Text("abcd"),
                    new DoubleWritable(2.1))
    );
    inputs.add(
            Arrays.asList(
                    (Writable)
                    new IntWritable(-3),
                    new FloatWritable(3.0f),
                    new Text("abcd"),
                    new DoubleWritable(2.1))
    );
    inputs.add(
            Arrays.asList(
                    (Writable)
                    new IntWritable(5),
                    new FloatWritable(11.2f),
                    new Text("abcd"),
                    new DoubleWritable(2.1))
    );

    LocalTransformExecutor.execute(inputs,tp);
}
 
Example #27
Source File: LocalExecuteExample.java    From Java-Deep-Learning-Cookbook with MIT License 4 votes vote down vote up
public static void main(String[] args) throws Exception {
    try {
        int numClasses = 2;
        int batchSize = 8;

        File file = new File("Path/to/titanic.csv-file");
        RecordReader recordReader = new CSVRecordReader(1,',');
        recordReader.initialize(new FileSplit(file));
        // WritableConverter writableConverter = new SelfWritableConverter();

        Schema schema = new Schema.Builder()
                .addColumnInteger("Survived")
                .addColumnCategorical("Pclass", Arrays.asList("1","2","3"))
                .addColumnString("Name")
                .addColumnCategorical("Sex", Arrays.asList("male","female"))
                .addColumnsInteger("Age","Siblings/Spouses Aboard","Parents/Children Aboard")
                .addColumnDouble("Fare")
                .build();
        TransformProcess transformProcess = new TransformProcess.Builder(schema)
                .removeColumns("Name","Fare")
                .categoricalToInteger("Sex")
                .categoricalToOneHot("Pclass")
                .removeColumns("Pclass[1]")
                .build();

        List<List<Writable>> outputData = new ArrayList<>();

        RecordWriter recordWriter = new CSVRecordWriter();
        Partitioner partitioner = new NumberOfRecordsPartitioner();
        recordWriter.initialize(new FileSplit(new File("/Path/To/LocalExecuteExample.csv/file")),partitioner);

        while(recordReader.hasNext()){
            outputData.add(recordReader.next());
        }
        List<List<Writable>> transformedOutput=LocalTransformExecutor.execute(outputData,transformProcess);
        recordWriter.writeBatch(transformedOutput);
        recordWriter.close();
    } catch (IllegalArgumentException e) {
        System.out.println("Please provide proper file paths for titanic.csv & fle in place of: Path/to/titanic.csv-file && /Path/To/LocalExecuteExample.csv");
        System.out.println("You need to create an empty CSV file and mention the file path in place of /Path/To/LocalExecuteExample.csv");
    }
}
 
Example #28
Source File: TestMultiNLPTransform.java    From deeplearning4j with Apache License 2.0 4 votes vote down vote up
@Test
public void test(){

    List<String> words = Arrays.asList("apple", "banana", "cherry", "date", "eggplant");
    GazeteerTransform t1 = new GazeteerTransform("words", "out", words);
    GazeteerTransform t2 = new GazeteerTransform("out", "out", words);


    MultiNlpTransform multi = new MultiNlpTransform("text", new BagOfWordsTransform[]{t1, t2}, "out");

    String[] corpus = {
            "hello I like apple".toLowerCase(),
            "date eggplant potato".toLowerCase()
    };

    List<List<List<Writable>>> input = new ArrayList<>();
    for(String s : corpus){
        String[] split = s.split(" ");
        List<List<Writable>> seq = new ArrayList<>();
        for(String s2 : split){
            seq.add(Collections.<Writable>singletonList(new Text(s2)));
        }
        input.add(seq);
    }

    SequenceSchema schema = (SequenceSchema) new SequenceSchema.Builder()
            .addColumnString("text").build();

    TransformProcess tp = new TransformProcess.Builder(schema)
            .transform(multi)
            .build();

    List<List<List<Writable>>> execute = LocalTransformExecutor.executeSequenceToSequence(input, tp);

    INDArray arr0 = ((NDArrayWritable)execute.get(0).get(0).get(0)).get();
    INDArray arr1 = ((NDArrayWritable)execute.get(0).get(1).get(0)).get();

    INDArray exp0 = Nd4j.create(new float[]{1, 0, 0, 0, 0, 1, 0, 0, 0, 0});
    INDArray exp1 = Nd4j.create(new float[]{0, 0, 0, 1, 1, 0, 0, 0, 1, 1});

    assertEquals(exp0, arr0);
    assertEquals(exp1, arr1);


    String json = tp.toJson();
    TransformProcess tp2 = TransformProcess.fromJson(json);
    assertEquals(tp, tp2);

    List<List<List<Writable>>> execute2 = LocalTransformExecutor.executeSequenceToSequence(input, tp);
    INDArray arr0a = ((NDArrayWritable)execute2.get(0).get(0).get(0)).get();
    INDArray arr1a = ((NDArrayWritable)execute2.get(0).get(1).get(0)).get();

    assertEquals(exp0, arr0a);
    assertEquals(exp1, arr1a);

}
 
Example #29
Source File: TestGazeteerTransform.java    From deeplearning4j with Apache License 2.0 4 votes vote down vote up
@Test
public void testGazeteerTransform(){

    String[] corpus = {
            "hello I like apple".toLowerCase(),
            "cherry date eggplant potato".toLowerCase()
    };

    //Gazeteer transform: basically 0/1 if word is present. Assumes already tokenized input
    List<String> words = Arrays.asList("apple", "banana", "cherry", "date", "eggplant");

    GazeteerTransform t = new GazeteerTransform("words", "out", words);

    SequenceSchema schema = (SequenceSchema) new SequenceSchema.Builder()
            .addColumnString("words").build();

    TransformProcess tp = new TransformProcess.Builder(schema)
            .transform(t)
            .build();

    List<List<List<Writable>>> input = new ArrayList<>();
    for(String s : corpus){
        String[] split = s.split(" ");
        List<List<Writable>> seq = new ArrayList<>();
        for(String s2 : split){
            seq.add(Collections.<Writable>singletonList(new Text(s2)));
        }
        input.add(seq);
    }

    List<List<List<Writable>>> execute = LocalTransformExecutor.executeSequenceToSequence(input, tp);

    INDArray arr0 = ((NDArrayWritable)execute.get(0).get(0).get(0)).get();
    INDArray arr1 = ((NDArrayWritable)execute.get(0).get(1).get(0)).get();

    INDArray exp0 = Nd4j.create(new float[]{1, 0, 0, 0, 0});
    INDArray exp1 = Nd4j.create(new float[]{0, 0, 1, 1, 1});

    assertEquals(exp0, arr0);
    assertEquals(exp1, arr1);


    String json = tp.toJson();
    TransformProcess tp2 = TransformProcess.fromJson(json);
    assertEquals(tp, tp2);

    List<List<List<Writable>>> execute2 = LocalTransformExecutor.executeSequenceToSequence(input, tp);
    INDArray arr0a = ((NDArrayWritable)execute2.get(0).get(0).get(0)).get();
    INDArray arr1a = ((NDArrayWritable)execute2.get(0).get(1).get(0)).get();

    assertEquals(exp0, arr0a);
    assertEquals(exp1, arr1a);
}