org.datavec.api.transform.TransformProcess Java Examples

The following examples show how to use org.datavec.api.transform.TransformProcess. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: CSVSparkTransformTest.java    From DataVec with Apache License 2.0 6 votes vote down vote up
@Test
public void testTransformerBatch() throws Exception {
    List<Writable> input = new ArrayList<>();
    input.add(new DoubleWritable(1.0));
    input.add(new DoubleWritable(2.0));

    Schema schema = new Schema.Builder().addColumnDouble("1.0").addColumnDouble("2.0").build();
    List<Writable> output = new ArrayList<>();
    output.add(new Text("1.0"));
    output.add(new Text("2.0"));

    TransformProcess transformProcess =
            new TransformProcess.Builder(schema).convertToString("1.0").convertToString("2.0").build();
    CSVSparkTransform csvSparkTransform = new CSVSparkTransform(transformProcess);
    String[] values = new String[] {"1.0", "2.0"};
    SingleCSVRecord record = csvSparkTransform.transform(new SingleCSVRecord(values));
    BatchCSVRecord batchCSVRecord = new BatchCSVRecord();
    for (int i = 0; i < 3; i++)
        batchCSVRecord.add(record);
    //data type is string, unable to convert
    BatchCSVRecord batchCSVRecord1 = csvSparkTransform.transform(batchCSVRecord);
  /*  Base64NDArrayBody body = csvSparkTransform.toArray(batchCSVRecord1);
    INDArray fromBase64 = Nd4jBase64.fromBase64(body.getNdarray());
    assertTrue(fromBase64.isMatrix());
    System.out.println("Base 64ed array " + fromBase64); */
}
 
Example #2
Source File: TestPythonTransformProcess.java    From deeplearning4j with Apache License 2.0 6 votes vote down vote up
@Test
public void testNumpyTransform() {
    PythonTransform pythonTransform = PythonTransform.builder()
            .code("a += 2; b = 'hello world'")
            .returnAllInputs(true)
            .build();

    List<List<Writable>> inputs = new ArrayList<>();
    inputs.add(Arrays.asList((Writable) new NDArrayWritable(Nd4j.scalar(1).reshape(1,1))));
    Schema inputSchema = new Builder()
            .addColumnNDArray("a",new long[]{1,1})
            .build();

    TransformProcess tp = new TransformProcess.Builder(inputSchema)
            .transform(pythonTransform)
            .build();
    List<List<Writable>> execute = LocalTransformExecutor.execute(inputs, tp);
    assertFalse(execute.isEmpty());
    assertNotNull(execute.get(0));
    assertNotNull(execute.get(0).get(0));
    assertNotNull(execute.get(0).get(1));
    assertEquals(Nd4j.scalar(3).reshape(1, 1),((NDArrayWritable)execute.get(0).get(0)).get());
    assertEquals("hello world",execute.get(0).get(1).toString());
}
 
Example #3
Source File: LocalTransformProcessRecordReaderTests.java    From DataVec with Apache License 2.0 6 votes vote down vote up
@Test
public void simpleTransformTestSequence() {
    List<List<Writable>> sequence = new ArrayList<>();
    //First window:
    sequence.add(Arrays.asList((Writable) new LongWritable(1451606400000L), new IntWritable(0),
            new IntWritable(0)));
    sequence.add(Arrays.asList((Writable) new LongWritable(1451606400000L + 100L), new IntWritable(1),
            new IntWritable(0)));
    sequence.add(Arrays.asList((Writable) new LongWritable(1451606400000L + 200L), new IntWritable(2),
            new IntWritable(0)));

    Schema schema = new SequenceSchema.Builder().addColumnTime("timecolumn", DateTimeZone.UTC)
            .addColumnInteger("intcolumn").addColumnInteger("intcolumn2").build();
    TransformProcess transformProcess = new TransformProcess.Builder(schema).removeColumns("intcolumn2").build();
    InMemorySequenceRecordReader inMemorySequenceRecordReader =
            new InMemorySequenceRecordReader(Arrays.asList(sequence));
    LocalTransformProcessSequenceRecordReader transformProcessSequenceRecordReader =
            new LocalTransformProcessSequenceRecordReader(inMemorySequenceRecordReader, transformProcess);
    List<List<Writable>> next = transformProcessSequenceRecordReader.sequenceRecord();
    assertEquals(2, next.get(0).size());

}
 
Example #4
Source File: TestPythonTransformProcess.java    From deeplearning4j with Apache License 2.0 6 votes vote down vote up
@Test
public void testPythonTransformNoOutputSpecified() throws Exception {
    PythonTransform pythonTransform = PythonTransform.builder()
            .code("a += 2; b = 'hello world'")
            .returnAllInputs(true)
            .build();
    List<List<Writable>> inputs = new ArrayList<>();
    inputs.add(Arrays.asList((Writable)new IntWritable(1)));
    Schema inputSchema = new Builder()
            .addColumnInteger("a")
            .build();

    TransformProcess tp = new TransformProcess.Builder(inputSchema)
            .transform(pythonTransform)
            .build();
    List<List<Writable>> execute = LocalTransformExecutor.execute(inputs, tp);
    assertEquals(3,execute.get(0).get(0).toInt());
    assertEquals("hello world",execute.get(0).get(1).toString());

}
 
Example #5
Source File: HyperParameterTuningArbiterUiExample.java    From Java-Deep-Learning-Cookbook with MIT License 6 votes vote down vote up
public RecordReader dataPreprocess() throws IOException, InterruptedException {
    //Schema Definitions
    Schema schema = new Schema.Builder()
            .addColumnsString("RowNumber")
            .addColumnInteger("CustomerId")
            .addColumnString("Surname")
            .addColumnInteger("CreditScore")
            .addColumnCategorical("Geography", Arrays.asList("France","Spain","Germany"))
            .addColumnCategorical("Gender",Arrays.asList("Male","Female"))
            .addColumnsInteger("Age","Tenure","Balance","NumOfProducts","HasCrCard","IsActiveMember","EstimatedSalary","Exited").build();

    //Schema Transformation
    TransformProcess transformProcess = new TransformProcess.Builder(schema)
            .removeColumns("RowNumber","Surname","CustomerId")
            .categoricalToInteger("Gender")
            .categoricalToOneHot("Geography")
            .removeColumns("Geography[France]")
            .build();

    //CSVReader - Reading from file and applying transformation
    RecordReader reader = new CSVRecordReader(1,',');
    reader.initialize(new FileSplit(new ClassPathResource("Churn_Modelling.csv").getFile()));
    RecordReader transformProcessRecordReader = new TransformProcessRecordReader(reader,transformProcess);
    return transformProcessRecordReader;
}
 
Example #6
Source File: CSVSparkTransformTest.java    From DataVec with Apache License 2.0 6 votes vote down vote up
@Test
public void testTransformer() throws Exception {
    List<Writable> input = new ArrayList<>();
    input.add(new DoubleWritable(1.0));
    input.add(new DoubleWritable(2.0));

    Schema schema = new Schema.Builder().addColumnDouble("1.0").addColumnDouble("2.0").build();
    List<Writable> output = new ArrayList<>();
    output.add(new Text("1.0"));
    output.add(new Text("2.0"));

    TransformProcess transformProcess =
            new TransformProcess.Builder(schema).convertToString("1.0").convertToString("2.0").build();
    CSVSparkTransform csvSparkTransform = new CSVSparkTransform(transformProcess);
    String[] values = new String[] {"1.0", "2.0"};
    SingleCSVRecord record = csvSparkTransform.transform(new SingleCSVRecord(values));
    Base64NDArrayBody body = csvSparkTransform.toArray(new SingleCSVRecord(values));
    INDArray fromBase64 = Nd4jBase64.fromBase64(body.getNdarray());
    assertTrue(fromBase64.isVector());
    System.out.println("Base 64ed array " + fromBase64);
}
 
Example #7
Source File: JsonArrayMapConverter.java    From konduit-serving with Apache License 2.0 6 votes vote down vote up
/**
 * {@inheritDoc}
 */
@Override
public List<? extends Map<FieldName, ?>> convertPmml(Schema schema, JsonArray jsonArray, TransformProcess transformProcess) {
    if (transformProcess != null) {
        return doTransformProcessConvertPmml(schema, jsonArray, transformProcess);
    }


    List<FieldName> fieldNames = getNameRepresentationFor(schema);


    List<Map<FieldName, Object>> ret = new ArrayList<>(jsonArray.size());

    for (int i = 0; i < jsonArray.size(); i++) {
        JsonObject jsonObject = jsonArray.getJsonObject(i);
        Map<FieldName, Object> record = new LinkedHashMap();
        for (int j = 0; j < schema.numColumns(); j++) {
            record.put(fieldNames.get(j), jsonObject.getValue(schema.getName(j)));

        }

        ret.add(record);
    }

    return ret;
}
 
Example #8
Source File: SerializationExample.java    From Java-Deep-Learning-Cookbook with MIT License 6 votes vote down vote up
public static void main(String[] args) {
    Schema schema  =  new Schema.Builder()
            .addColumnsString("Name", "Subject")
            .addColumnInteger("Score")
            .addColumnCategorical("Grade", Arrays.asList("A","B","C","D"))
            .addColumnInteger("Passed").build();

    TransformProcess transformProcess = new TransformProcess.Builder(schema)
            .removeColumns("Name")
            .transform(new ConvertToDouble("Score"))
            .categoricalToInteger("Grade").build();

    String json = transformProcess.toJson();
    System.out.println(json);

    String yaml = transformProcess.toYaml();
    System.out.println(yaml);
}
 
Example #9
Source File: CreateInferenceTransformDescription.java    From SKIL_Examples with Apache License 2.0 6 votes vote down vote up
private void entryPoint(String... args) throws Exception {
    JCommander jcmdr = new JCommander(this);
    try {
        jcmdr.parse(args);
    } catch (ParameterException e) {
        System.out.println(e);
        jcmdr.usage();
        System.exit(1);
    }

    DataAnalysis analysis = DataAnalysis.fromJson(FileUtils.readFileToString(dataAnalysis));

    TransformProcess tp = IrisData.inferenceTransform(analysis);

    FileUtils.writeStringToFile(new File(outputPath + File.separator + "iris-inference-transform.json"), tp.toJson());
}
 
Example #10
Source File: TestYamlJsonSerde.java    From deeplearning4j with Apache License 2.0 6 votes vote down vote up
@Test
public void testTransformProcessAndSchema() {

    Schema schema = new Schema.Builder().addColumnInteger("firstCol").addColumnNDArray("nd1a", new long[] {1, 10})
                    .addColumnNDArray("nd1b", new long[] {1, 10}).addColumnNDArray("nd2", new long[] {1, 100})
                    .addColumnNDArray("nd3", new long[] {-1, -1}).build();

    TransformProcess tp = new TransformProcess.Builder(schema).integerMathOp("firstCol", MathOp.Add, 1)
                    .ndArrayColumnsMathOpTransform("added", MathOp.Add, "nd1a", "nd1b")
                    .ndArrayMathFunctionTransform("nd2", MathFunction.SQRT)
                    .ndArrayScalarOpTransform("nd3", MathOp.Multiply, 2.0).build();

    String asJson = tp.toJson();
    String asYaml = tp.toYaml();

    TransformProcess fromJson = TransformProcess.fromJson(asJson);
    TransformProcess fromYaml = TransformProcess.fromYaml(asYaml);

    assertEquals(tp, fromJson);
    assertEquals(tp, fromYaml);
}
 
Example #11
Source File: CSVSparkTransformTest.java    From deeplearning4j with Apache License 2.0 6 votes vote down vote up
@Test
    public void testTransformer() throws Exception {
        List<Writable> input = new ArrayList<>();
        input.add(new DoubleWritable(1.0));
        input.add(new DoubleWritable(2.0));

        Schema schema = new Schema.Builder().addColumnDouble("1.0").addColumnDouble("2.0").build();
        List<Writable> output = new ArrayList<>();
        output.add(new Text("1.0"));
        output.add(new Text("2.0"));

        TransformProcess transformProcess =
                new TransformProcess.Builder(schema).convertToString("1.0").convertToString("2.0").build();
        CSVSparkTransform csvSparkTransform = new CSVSparkTransform(transformProcess);
        String[] values = new String[] {"1.0", "2.0"};
        SingleCSVRecord record = csvSparkTransform.transform(new SingleCSVRecord(values));
        Base64NDArrayBody body = csvSparkTransform.toArray(new SingleCSVRecord(values));
        INDArray fromBase64 = Nd4jBase64.fromBase64(body.getNdarray());
        assertTrue(fromBase64.isVector());
//        System.out.println("Base 64ed array " + fromBase64);
    }
 
Example #12
Source File: TransformProcessRecordReaderTests.java    From DataVec with Apache License 2.0 6 votes vote down vote up
@Test
public void simpleTransformTest() throws Exception {
    Schema schema = new Schema.Builder()
            .addColumnsDouble("%d", 0, 4)
            .build();
    TransformProcess transformProcess = new TransformProcess.Builder(schema).removeColumns("0").build();
    CSVRecordReader csvRecordReader = new CSVRecordReader();
    csvRecordReader.initialize(new FileSplit(new ClassPathResource("iris.dat").getFile()));
    TransformProcessRecordReader rr =
                    new TransformProcessRecordReader(csvRecordReader, transformProcess);
    int count = 0;
    List<List<Writable>> all = new ArrayList<>();
    while(rr.hasNext()){
        List<Writable> next = rr.next();
        assertEquals(4, next.size());
        count++;
        all.add(next);
    }
    assertEquals(150, count);

    //Test batch:
    assertTrue(rr.resetSupported());
    rr.reset();
    List<List<Writable>> batch = rr.next(150);
    assertEquals(all, batch);
}
 
Example #13
Source File: SerializationExample.java    From Java-Deep-Learning-Cookbook with MIT License 6 votes vote down vote up
public static void main(String[] args) {
    Schema schema  =  new Schema.Builder()
            .addColumnsString("Name", "Subject")
            .addColumnInteger("Score")
            .addColumnCategorical("Grade", Arrays.asList("A","B","C","D"))
            .addColumnInteger("Passed").build();

    TransformProcess transformProcess = new TransformProcess.Builder(schema)
            .removeColumns("Name")
            .transform(new ConvertToDouble("Score"))
            .categoricalToInteger("Grade").build();

    String json = transformProcess.toJson();
    System.out.println(json);

    String yaml = transformProcess.toYaml();
    System.out.println(yaml);
}
 
Example #14
Source File: ExecutionTest.java    From deeplearning4j with Apache License 2.0 5 votes vote down vote up
@Test
public void testExecutionNdarray() {
    Schema schema = new Schema.Builder()
            .addColumnNDArray("first",new long[]{1,32577})
            .addColumnNDArray("second",new long[]{1,32577}).build();

    TransformProcess transformProcess = new TransformProcess.Builder(schema)
            .ndArrayMathFunctionTransform("first", MathFunction.SIN)
            .ndArrayMathFunctionTransform("second",MathFunction.COS)
            .build();

    List<List<Writable>> functions = new ArrayList<>();
    List<Writable> firstRow = new ArrayList<>();
    INDArray firstArr = Nd4j.linspace(1,4,4);
    INDArray secondArr = Nd4j.linspace(1,4,4);
    firstRow.add(new NDArrayWritable(firstArr));
    firstRow.add(new NDArrayWritable(secondArr));
    functions.add(firstRow);

    List<List<Writable>> execute = LocalTransformExecutor.execute(functions, transformProcess);
    INDArray firstResult = ((NDArrayWritable) execute.get(0).get(0)).get();
    INDArray secondResult = ((NDArrayWritable) execute.get(0).get(1)).get();

    INDArray expected = Transforms.sin(firstArr);
    INDArray secondExpected = Transforms.cos(secondArr);
    assertEquals(expected,firstResult);
    assertEquals(secondExpected,secondResult);

}
 
Example #15
Source File: TestPythonTransformProcess.java    From deeplearning4j with Apache License 2.0 5 votes vote down vote up
@Test
public void testWithSetupRun() throws Exception {

    PythonTransform pythonTransform = PythonTransform.builder()
            .code("five=None\n" +
                    "def setup():\n" +
                    "    global five\n"+
                    "    five = 5\n\n" +
                    "def run(a, b):\n" +
                    "    c = a + b + five\n"+
                    "    return {'c':c}\n\n")
            .returnAllInputs(true)
            .setupAndRun(true)
            .build();

    List<List<Writable>> inputs = new ArrayList<>();
    inputs.add(Arrays.asList((Writable) new NDArrayWritable(Nd4j.scalar(1).reshape(1,1)),
            new NDArrayWritable(Nd4j.scalar(2).reshape(1,1))));
    Schema inputSchema = new Builder()
            .addColumnNDArray("a",new long[]{1,1})
            .addColumnNDArray("b", new long[]{1, 1})
            .build();

    TransformProcess tp = new TransformProcess.Builder(inputSchema)
            .transform(pythonTransform)
            .build();
    List<List<Writable>> execute = LocalTransformExecutor.execute(inputs, tp);
    assertFalse(execute.isEmpty());
    assertNotNull(execute.get(0));
    assertNotNull(execute.get(0).get(0));
    assertEquals(Nd4j.scalar(8).reshape(1, 1),((NDArrayWritable)execute.get(0).get(3)).get());
}
 
Example #16
Source File: TestCalculateSortedRank.java    From deeplearning4j with Apache License 2.0 5 votes vote down vote up
@Test
public void testCalculateSortedRank() {

    List<List<Writable>> data = new ArrayList<>();
    data.add(Arrays.asList((Writable) new Text("0"), new DoubleWritable(0.0)));
    data.add(Arrays.asList((Writable) new Text("3"), new DoubleWritable(0.3)));
    data.add(Arrays.asList((Writable) new Text("2"), new DoubleWritable(0.2)));
    data.add(Arrays.asList((Writable) new Text("1"), new DoubleWritable(0.1)));

    JavaRDD<List<Writable>> rdd = sc.parallelize(data);

    Schema schema = new Schema.Builder().addColumnsString("TextCol").addColumnDouble("DoubleCol").build();

    TransformProcess tp = new TransformProcess.Builder(schema)
                    .calculateSortedRank("rank", "DoubleCol", new DoubleWritableComparator()).build();

    Schema outSchema = tp.getFinalSchema();
    assertEquals(3, outSchema.numColumns());
    assertEquals(Arrays.asList("TextCol", "DoubleCol", "rank"), outSchema.getColumnNames());
    assertEquals(Arrays.asList(ColumnType.String, ColumnType.Double, ColumnType.Long), outSchema.getColumnTypes());

    JavaRDD<List<Writable>> out = SparkTransformExecutor.execute(rdd, tp);

    List<List<Writable>> collected = out.collect();
    assertEquals(4, collected.size());
    for (int i = 0; i < 4; i++)
        assertEquals(3, collected.get(i).size());

    for (List<Writable> example : collected) {
        int exampleNum = example.get(0).toInt();
        int rank = example.get(2).toInt();
        assertEquals(exampleNum, rank);
    }
}
 
Example #17
Source File: IrisData.java    From SKIL_Examples with Apache License 2.0 5 votes vote down vote up
public static TransformProcess inferenceTransform(DataAnalysis analysis) {
    return new TransformProcess.Builder(IrisData.SCHEMA)
            .removeColumns(IrisData.COL5_LABEL)
            .normalize(IrisData.COL1_SEPAL_LENGTH, Normalize.Standardize, analysis)
            .normalize(IrisData.COL2_SEPAL_WIDTH, Normalize.Standardize, analysis)
            .normalize(IrisData.COL3_PETAL_LENGTH, Normalize.Standardize, analysis)
            .normalize(IrisData.COL4_PETAL_WIDTH, Normalize.Standardize, analysis)
            .build();
}
 
Example #18
Source File: LocalTransformExecutor.java    From DataVec with Apache License 2.0 5 votes vote down vote up
/**
 * Execute the specified TransformProcess with the given <i>sequence</i> input data<br>
 * Note: this method can only be used if the TransformProcess starts with sequence data, and also returns sequence data
 *
 * @param inputSequence    Input sequence data to process
 * @param transformProcess TransformProcess to execute
 * @return Processed (non-sequential) data
 */
public static List<List<List<Writable>>> executeSequenceToSequence(List<List<List<Writable>>> inputSequence,
                                                                   TransformProcess transformProcess) {
    if (!(transformProcess.getFinalSchema() instanceof SequenceSchema)) {
        List<List<List<Writable>>> ret = new ArrayList<>(inputSequence.size());
        for(List<List<Writable>> timeStep : inputSequence) {
            ret.add(execute(timeStep,null, transformProcess).getFirst());
        }

        return ret;
    }

    return execute(null, inputSequence, transformProcess).getSecond();
}
 
Example #19
Source File: LocalTransformExecutor.java    From deeplearning4j with Apache License 2.0 5 votes vote down vote up
/**
 * Execute the specified TransformProcess with the given <i>sequence</i> input data<br>
 * Note: this method can only be used if the TransformProcess starts with sequence data, and also returns sequence data
 *
 * @param inputSequence    Input sequence data to process
 * @param transformProcess TransformProcess to execute
 * @return Processed (non-sequential) data
 */
public static List<List<List<Writable>>> executeSequenceToSequence(List<List<List<Writable>>> inputSequence,
                                                                   TransformProcess transformProcess) {
    if (!(transformProcess.getFinalSchema() instanceof SequenceSchema)) {
        List<List<List<Writable>>> ret = new ArrayList<>(inputSequence.size());
        for(List<List<Writable>> timeStep : inputSequence) {
            ret.add(execute(timeStep,null, transformProcess).getFirst());
        }

        return ret;
    }

    return execute(null, inputSequence, transformProcess).getSecond();
}
 
Example #20
Source File: TestPythonTransformProcess.java    From deeplearning4j with Apache License 2.0 5 votes vote down vote up
@Test()
public void testStringConcat() throws Exception{
    Builder schemaBuilder = new Builder();
    schemaBuilder
            .addColumnString("col1")
            .addColumnString("col2");

    Schema initialSchema = schemaBuilder.build();
    schemaBuilder.addColumnString("col3");
    Schema finalSchema = schemaBuilder.build();

    String pythonCode = "col3 = col1 + col2";

    TransformProcess tp = new TransformProcess.Builder(initialSchema).transform(
            PythonTransform.builder().code(pythonCode)
                    .outputSchema(finalSchema)
                    .build()
    ).build();

    List<Writable> inputs = Arrays.asList((Writable)new Text("Hello "), new Text("World!"));

    List<Writable> outputs = tp.execute(inputs);
    assertEquals((outputs.get(0)).toString(), "Hello ");
    assertEquals((outputs.get(1)).toString(), "World!");
    assertEquals((outputs.get(2)).toString(), "Hello World!");

}
 
Example #21
Source File: TestCalculateSortedRank.java    From DataVec with Apache License 2.0 5 votes vote down vote up
@Test
public void testCalculateSortedRank() {

    List<List<Writable>> data = new ArrayList<>();
    data.add(Arrays.asList((Writable) new Text("0"), new DoubleWritable(0.0)));
    data.add(Arrays.asList((Writable) new Text("3"), new DoubleWritable(0.3)));
    data.add(Arrays.asList((Writable) new Text("2"), new DoubleWritable(0.2)));
    data.add(Arrays.asList((Writable) new Text("1"), new DoubleWritable(0.1)));

    List<List<Writable>> rdd = (data);

    Schema schema = new Schema.Builder().addColumnsString("TextCol").addColumnDouble("DoubleCol").build();

    TransformProcess tp = new TransformProcess.Builder(schema)
                    .calculateSortedRank("rank", "DoubleCol", new DoubleWritableComparator()).build();

    Schema outSchema = tp.getFinalSchema();
    assertEquals(3, outSchema.numColumns());
    assertEquals(Arrays.asList("TextCol", "DoubleCol", "rank"), outSchema.getColumnNames());
    assertEquals(Arrays.asList(ColumnType.String, ColumnType.Double, ColumnType.Long), outSchema.getColumnTypes());

    List<List<Writable>> out = LocalTransformExecutor.execute(rdd, tp);

    List<List<Writable>> collected = out;
    assertEquals(4, collected.size());
    for (int i = 0; i < 4; i++)
        assertEquals(3, collected.get(i).size());

    for (List<Writable> example : collected) {
        int exampleNum = example.get(0).toInt();
        int rank = example.get(2).toInt();
        assertEquals(exampleNum, rank);
    }
}
 
Example #22
Source File: ExecutionTest.java    From DataVec with Apache License 2.0 5 votes vote down vote up
@Test
public void testExecutionSimple() {
    Schema schema = new Schema.Builder().addColumnInteger("col0")
            .addColumnCategorical("col1", "state0", "state1", "state2").addColumnDouble("col2").build();

    TransformProcess tp = new TransformProcess.Builder(schema).categoricalToInteger("col1")
            .doubleMathOp("col2", MathOp.Add, 10.0).build();

    List<List<Writable>> inputData = new ArrayList<>();
    inputData.add(Arrays.<Writable>asList(new IntWritable(0), new Text("state2"), new DoubleWritable(0.1)));
    inputData.add(Arrays.<Writable>asList(new IntWritable(1), new Text("state1"), new DoubleWritable(1.1)));
    inputData.add(Arrays.<Writable>asList(new IntWritable(2), new Text("state0"), new DoubleWritable(2.1)));

    List<List<Writable>> rdd = (inputData);

    List<List<Writable>> out = new ArrayList<>(LocalTransformExecutor.execute(rdd, tp));

    Collections.sort(out, new Comparator<List<Writable>>() {
        @Override
        public int compare(List<Writable> o1, List<Writable> o2) {
            return Integer.compare(o1.get(0).toInt(), o2.get(0).toInt());
        }
    });

    List<List<Writable>> expected = new ArrayList<>();
    expected.add(Arrays.<Writable>asList(new IntWritable(0), new IntWritable(2), new DoubleWritable(10.1)));
    expected.add(Arrays.<Writable>asList(new IntWritable(1), new IntWritable(1), new DoubleWritable(11.1)));
    expected.add(Arrays.<Writable>asList(new IntWritable(2), new IntWritable(0), new DoubleWritable(12.1)));

    assertEquals(expected, out);
}
 
Example #23
Source File: ExecutionTest.java    From DataVec with Apache License 2.0 5 votes vote down vote up
@Test
public void testFilter() {
    Schema filterSchema = new Schema.Builder()
            .addColumnDouble("col1").addColumnDouble("col2")
            .addColumnDouble("col3").build();
    List<List<Writable>> inputData = new ArrayList<>();
    inputData.add(Arrays.<Writable>asList(new IntWritable(0), new DoubleWritable(1), new DoubleWritable(0.1)));
    inputData.add(Arrays.<Writable>asList(new IntWritable(1), new DoubleWritable(3), new DoubleWritable(1.1)));
    inputData.add(Arrays.<Writable>asList(new IntWritable(2), new DoubleWritable(3), new DoubleWritable(2.1)));
    TransformProcess transformProcess = new TransformProcess.Builder(filterSchema)
            .filter(new DoubleColumnCondition("col1",ConditionOp.LessThan,1)).build();
    List<List<Writable>> execute = LocalTransformExecutor.execute(inputData, transformProcess);
    assertEquals(2,execute.size());
}
 
Example #24
Source File: ExecutionTest.java    From deeplearning4j with Apache License 2.0 5 votes vote down vote up
@Test
public void testExecutionSimple() {
    Schema schema = new Schema.Builder().addColumnInteger("col0")
            .addColumnCategorical("col1", "state0", "state1", "state2").addColumnDouble("col2").
                    addColumnFloat("col3").build();

    TransformProcess tp = new TransformProcess.Builder(schema).categoricalToInteger("col1")
            .doubleMathOp("col2", MathOp.Add, 10.0).floatMathOp("col3", MathOp.Add, 5f).build();

    List<List<Writable>> inputData = new ArrayList<>();
    inputData.add(Arrays.<Writable>asList(new IntWritable(0), new Text("state2"), new DoubleWritable(0.1), new FloatWritable(0.3f)));
    inputData.add(Arrays.<Writable>asList(new IntWritable(1), new Text("state1"), new DoubleWritable(1.1), new FloatWritable(1.7f)));
    inputData.add(Arrays.<Writable>asList(new IntWritable(2), new Text("state0"), new DoubleWritable(2.1), new FloatWritable(3.6f)));

    List<List<Writable>> rdd = (inputData);

    List<List<Writable>> out = new ArrayList<>(LocalTransformExecutor.execute(rdd, tp));

    Collections.sort(out, new Comparator<List<Writable>>() {
        @Override
        public int compare(List<Writable> o1, List<Writable> o2) {
            return Integer.compare(o1.get(0).toInt(), o2.get(0).toInt());
        }
    });

    List<List<Writable>> expected = new ArrayList<>();
    expected.add(Arrays.<Writable>asList(new IntWritable(0), new IntWritable(2), new DoubleWritable(10.1), new FloatWritable(5.3f)));
    expected.add(Arrays.<Writable>asList(new IntWritable(1), new IntWritable(1), new DoubleWritable(11.1), new FloatWritable(6.7f)));
    expected.add(Arrays.<Writable>asList(new IntWritable(2), new IntWritable(0), new DoubleWritable(12.1), new FloatWritable(8.6f)));

    assertEquals(expected, out);
}
 
Example #25
Source File: DataVecTransformClient.java    From DataVec with Apache License 2.0 5 votes vote down vote up
/**
 * @param transformProcess
 */
@Override
public void setCSVTransformProcess(TransformProcess transformProcess) {
    try {
        String s = transformProcess.toJson();
        Unirest.post(url + "/transformprocess").header("accept", "application/json")
                .header("Content-Type", "application/json").body(s).asJson();

    } catch (UnirestException e) {
        log.error("Error in setCSVTransformProcess()", e);
        e.printStackTrace();
    }
}
 
Example #26
Source File: TestConvertToSequence.java    From deeplearning4j with Apache License 2.0 5 votes vote down vote up
@Test
public void testConvertToSequenceLength1(){

    Schema s = new Schema.Builder()
            .addColumnsString("string")
            .addColumnLong("long")
            .build();

    List<List<Writable>> allExamples = Arrays.asList(
            Arrays.<Writable>asList(new Text("a"), new LongWritable(0)),
            Arrays.<Writable>asList(new Text("b"), new LongWritable(1)),
            Arrays.<Writable>asList(new Text("c"), new LongWritable(2)));

    TransformProcess tp = new TransformProcess.Builder(s)
            .convertToSequence()
            .build();

    JavaRDD<List<Writable>> rdd = sc.parallelize(allExamples);

    JavaRDD<List<List<Writable>>> out = SparkTransformExecutor.executeToSequence(rdd, tp);

    List<List<List<Writable>>> out2 = out.collect();

    assertEquals(3, out2.size());

    for( int i=0; i<3; i++ ){
        assertTrue(out2.contains(Collections.singletonList(allExamples.get(i))));
    }
}
 
Example #27
Source File: CSVSparkTransformTest.java    From DataVec with Apache License 2.0 5 votes vote down vote up
@Test
public void testSingleBatchSequence() throws Exception {
    List<Writable> input = new ArrayList<>();
    input.add(new DoubleWritable(1.0));
    input.add(new DoubleWritable(2.0));

    Schema schema = new Schema.Builder().addColumnDouble("1.0").addColumnDouble("2.0").build();
    List<Writable> output = new ArrayList<>();
    output.add(new Text("1.0"));
    output.add(new Text("2.0"));

    TransformProcess transformProcess =
            new TransformProcess.Builder(schema).convertToString("1.0").convertToString("2.0").build();
    CSVSparkTransform csvSparkTransform = new CSVSparkTransform(transformProcess);
    String[] values = new String[] {"1.0", "2.0"};
    SingleCSVRecord record = csvSparkTransform.transform(new SingleCSVRecord(values));
    BatchCSVRecord batchCSVRecord = new BatchCSVRecord();
    for (int i = 0; i < 3; i++)
        batchCSVRecord.add(record);
    BatchCSVRecord batchCSVRecord1 = csvSparkTransform.transform(batchCSVRecord);
    SequenceBatchCSVRecord sequenceBatchCSVRecord = new SequenceBatchCSVRecord();
    sequenceBatchCSVRecord.add(Arrays.asList(batchCSVRecord));
    Base64NDArrayBody sequenceArray = csvSparkTransform.transformSequenceArray(sequenceBatchCSVRecord);
    INDArray outputBody = Nd4jBase64.fromBase64(sequenceArray.getNdarray());


     //ensure accumulation
    sequenceBatchCSVRecord.add(Arrays.asList(batchCSVRecord));
    sequenceArray = csvSparkTransform.transformSequenceArray(sequenceBatchCSVRecord);
    assertArrayEquals(new long[]{2,2,3},Nd4jBase64.fromBase64(sequenceArray.getNdarray()).shape());

    SequenceBatchCSVRecord transformed = csvSparkTransform.transformSequence(sequenceBatchCSVRecord);
    assertNotNull(transformed.getRecords());
    System.out.println(transformed);


}
 
Example #28
Source File: TestSerialization.java    From deeplearning4j with Apache License 2.0 5 votes vote down vote up
private static TransformProcess getTp(){
    Schema s = new Schema.Builder().addColumnDouble("d").build();
    TransformProcess tp = new TransformProcess.Builder(s)
            .doubleMathFunction("d", MathFunction.ABS)
            .build();
    return tp;
}
 
Example #29
Source File: TestPythonTransformProcess.java    From deeplearning4j with Apache License 2.0 5 votes vote down vote up
@Test(timeout = 60000L)
public void testNDArray() throws Exception{
    long[] shape = new long[]{3, 2};
    INDArray arr1 = Nd4j.rand(shape);
    INDArray arr2 = Nd4j.rand(shape);

    INDArray expectedOutput = arr1.add(arr2);

    Builder schemaBuilder = new Builder();
    schemaBuilder
            .addColumnNDArray("col1", shape)
            .addColumnNDArray("col2", shape);

    Schema initialSchema = schemaBuilder.build();
    schemaBuilder.addColumnNDArray("col3", shape);
    Schema finalSchema = schemaBuilder.build();

    String pythonCode = "col3 = col1 + col2";
    TransformProcess tp = new TransformProcess.Builder(initialSchema).transform(
            PythonTransform.builder().code(pythonCode)
                    .outputSchema(finalSchema)
                    .build()        ).build();

    List<Writable> inputs = Arrays.asList(
            (Writable)
            new NDArrayWritable(arr1),
            new NDArrayWritable(arr2)
    );

    List<Writable> outputs = tp.execute(inputs);
    assertEquals(arr1, ((NDArrayWritable)outputs.get(0)).get());
    assertEquals(arr2, ((NDArrayWritable)outputs.get(1)).get());
    assertEquals(expectedOutput,((NDArrayWritable)outputs.get(2)).get());

}
 
Example #30
Source File: DataVecTransformClient.java    From deeplearning4j with Apache License 2.0 5 votes vote down vote up
/**
 * @param transformProcess
 */
@Override
public void setCSVTransformProcess(TransformProcess transformProcess) {
    try {
        String s = transformProcess.toJson();
        Unirest.post(url + "/transformprocess").header("accept", "application/json")
                .header("Content-Type", "application/json").body(s).asJson();

    } catch (UnirestException e) {
        log.error("Error in setCSVTransformProcess()", e);
    }
}