org.datavec.api.transform.schema.Schema Java Examples

The following examples show how to use org.datavec.api.transform.schema.Schema. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: TimeWindowFunction.java    From DataVec with Apache License 2.0 6 votes vote down vote up
@Override
public void setInputSchema(Schema schema) {
    if (!(schema instanceof SequenceSchema))
        throw new IllegalArgumentException(
                        "Invalid schema: TimeWindowFunction can " + "only operate on SequenceSchema");
    if (!schema.hasColumn(timeColumn))
        throw new IllegalStateException("Input schema does not have a column with name \"" + timeColumn + "\"");

    if (schema.getMetaData(timeColumn).getColumnType() != ColumnType.Time)
        throw new IllegalStateException("Invalid column: column \"" + timeColumn + "\" is not of type "
                        + ColumnType.Time + "; is " + schema.getMetaData(timeColumn).getColumnType());

    this.inputSchema = schema;

    timeZone = ((TimeMetaData) schema.getMetaData(timeColumn)).getTimeZone();
}
 
Example #2
Source File: TestPythonTransformProcess.java    From deeplearning4j with Apache License 2.0 6 votes vote down vote up
@Test
public void testNumpyTransform() {
    PythonTransform pythonTransform = PythonTransform.builder()
            .code("a += 2; b = 'hello world'")
            .returnAllInputs(true)
            .build();

    List<List<Writable>> inputs = new ArrayList<>();
    inputs.add(Arrays.asList((Writable) new NDArrayWritable(Nd4j.scalar(1).reshape(1,1))));
    Schema inputSchema = new Builder()
            .addColumnNDArray("a",new long[]{1,1})
            .build();

    TransformProcess tp = new TransformProcess.Builder(inputSchema)
            .transform(pythonTransform)
            .build();
    List<List<Writable>> execute = LocalTransformExecutor.execute(inputs, tp);
    assertFalse(execute.isEmpty());
    assertNotNull(execute.get(0));
    assertNotNull(execute.get(0).get(0));
    assertNotNull(execute.get(0).get(1));
    assertEquals(Nd4j.scalar(3).reshape(1, 1),((NDArrayWritable)execute.get(0).get(0)).get());
    assertEquals("hello world",execute.get(0).get(1).toString());
}
 
Example #3
Source File: TestTransforms.java    From DataVec with Apache License 2.0 6 votes vote down vote up
@Test
public void testRemoveAllColumnsExceptForTransform() {
    Schema schema = new Schema.Builder().addColumnDouble("first").addColumnString("second")
            .addColumnInteger("third").addColumnLong("fourth").build();

    Transform transform = new RemoveAllColumnsExceptForTransform("second", "third");
    transform.setInputSchema(schema);

    Schema out = transform.transform(schema);

    assertEquals(2, out.getColumnMetaData().size());
    TestCase.assertEquals(ColumnType.String, out.getMetaData(0).getColumnType());
    TestCase.assertEquals(ColumnType.Integer, out.getMetaData(1).getColumnType());

    assertEquals(Arrays.asList(new Text("one"), new IntWritable(1)),
            transform.map(Arrays.asList((Writable) new DoubleWritable(1.0), new Text("one"),
                    new IntWritable(1), new LongWritable(1L))));

}
 
Example #4
Source File: TransformProcessRecordReaderTests.java    From DataVec with Apache License 2.0 6 votes vote down vote up
@Test
public void simpleTransformTestSequence() {
    List<List<Writable>> sequence = new ArrayList<>();
    //First window:
    sequence.add(Arrays.asList((Writable) new LongWritable(1451606400000L), new IntWritable(0),
                    new IntWritable(0)));
    sequence.add(Arrays.asList((Writable) new LongWritable(1451606400000L + 100L), new IntWritable(1),
                    new IntWritable(0)));
    sequence.add(Arrays.asList((Writable) new LongWritable(1451606400000L + 200L), new IntWritable(2),
                    new IntWritable(0)));

    Schema schema = new SequenceSchema.Builder().addColumnTime("timecolumn", DateTimeZone.UTC)
                    .addColumnInteger("intcolumn").addColumnInteger("intcolumn2").build();
    TransformProcess transformProcess = new TransformProcess.Builder(schema).removeColumns("intcolumn2").build();
    InMemorySequenceRecordReader inMemorySequenceRecordReader =
                    new InMemorySequenceRecordReader(Arrays.asList(sequence));
    TransformProcessSequenceRecordReader transformProcessSequenceRecordReader =
                    new TransformProcessSequenceRecordReader(inMemorySequenceRecordReader, transformProcess);
    List<List<Writable>> next = transformProcessSequenceRecordReader.sequenceRecord();
    assertEquals(2, next.get(0).size());

}
 
Example #5
Source File: DuplicateColumnsTransform.java    From deeplearning4j with Apache License 2.0 6 votes vote down vote up
@Override
public Schema transform(Schema inputSchema) {
    List<ColumnMetaData> oldMeta = inputSchema.getColumnMetaData();
    List<ColumnMetaData> newMeta = new ArrayList<>(oldMeta.size() + newColumnNames.size());

    List<String> oldNames = inputSchema.getColumnNames();

    int dupCount = 0;
    for (int i = 0; i < oldMeta.size(); i++) {
        String current = oldNames.get(i);
        newMeta.add(oldMeta.get(i));

        if (columnsToDuplicateSet.contains(current)) {
            //Duplicate the current columnName, and place it after...
            String dupName = newColumnNames.get(dupCount);
            ColumnMetaData m = oldMeta.get(i).clone();
            m.setName(dupName);
            newMeta.add(m);
            dupCount++;
        }
    }

    return inputSchema.newSchema(newMeta);
}
 
Example #6
Source File: TestConditions.java    From deeplearning4j with Apache License 2.0 6 votes vote down vote up
@Test
public void testDoubleCondition() {
    Schema schema = TestTransforms.getSchema(ColumnType.Double);

    Condition condition =
                    new DoubleColumnCondition("column", SequenceConditionMode.Or, ConditionOp.GreaterOrEqual, 0);
    condition.setInputSchema(schema);

    assertTrue(condition.condition(Collections.singletonList((Writable) new DoubleWritable(0.0))));
    assertTrue(condition.condition(Collections.singletonList((Writable) new DoubleWritable(0.5))));
    assertFalse(condition.condition(Collections.singletonList((Writable) new DoubleWritable(-0.5))));
    assertFalse(condition.condition(Collections.singletonList((Writable) new DoubleWritable(-1))));

    Set<Double> set = new HashSet<>();
    set.add(0.0);
    set.add(3.0);
    condition = new DoubleColumnCondition("column", SequenceConditionMode.Or, ConditionOp.InSet, set);
    condition.setInputSchema(schema);
    assertTrue(condition.condition(Collections.singletonList((Writable) new DoubleWritable(0.0))));
    assertTrue(condition.condition(Collections.singletonList((Writable) new DoubleWritable(3.0))));
    assertFalse(condition.condition(Collections.singletonList((Writable) new DoubleWritable(1.0))));
    assertFalse(condition.condition(Collections.singletonList((Writable) new DoubleWritable(2.0))));
}
 
Example #7
Source File: TestTransforms.java    From deeplearning4j with Apache License 2.0 6 votes vote down vote up
@Test
public void testLongColumnsMathOpTransform() {
    Schema schema = new Schema.Builder().addColumnLong("first").addColumnString("second").addColumnLong("third")
            .build();

    Transform transform = new LongColumnsMathOpTransform("out", MathOp.Add, "first", "third");
    transform.setInputSchema(schema);

    Schema out = transform.transform(schema);
    assertEquals(4, out.numColumns());
    assertEquals(Arrays.asList("first", "second", "third", "out"), out.getColumnNames());
    assertEquals(Arrays.asList(ColumnType.Long, ColumnType.String, ColumnType.Long, ColumnType.Long),
            out.getColumnTypes());


    assertEquals(Arrays.asList((Writable) new LongWritable(1), new Text("something"), new LongWritable(2),
            new LongWritable(3)),
            transform.map(Arrays.asList((Writable) new LongWritable(1), new Text("something"),
                    new LongWritable(2))));
    assertEquals(Arrays.asList((Writable) new LongWritable(100), new Text("something2"), new LongWritable(21),
            new LongWritable(121)),
            transform.map(Arrays.asList((Writable) new LongWritable(100), new Text("something2"),
                    new LongWritable(21))));
}
 
Example #8
Source File: CustomerRetentionPredictionExample.java    From Java-Deep-Learning-Cookbook with MIT License 6 votes vote down vote up
private static Schema generateSchema(){
    final Schema schema = new Schema.Builder()
                                .addColumnString("RowNumber")
                                .addColumnInteger("CustomerId")
                                .addColumnString("Surname")
                                .addColumnInteger("CreditScore")
                                .addColumnCategorical("Geography", Arrays.asList("France","Germany","Spain"))
                                .addColumnCategorical("Gender", Arrays.asList("Male","Female"))
                                .addColumnsInteger("Age", "Tenure")
                                .addColumnDouble("Balance")
                                .addColumnsInteger("NumOfProducts","HasCrCard","IsActiveMember")
                                .addColumnDouble("EstimatedSalary")
                                .addColumnInteger("Exited")
                                .build();
    return schema;

}
 
Example #9
Source File: LocalTransformProcessRecordReaderTests.java    From DataVec with Apache License 2.0 6 votes vote down vote up
@Test
public void simpleTransformTestSequence() {
    List<List<Writable>> sequence = new ArrayList<>();
    //First window:
    sequence.add(Arrays.asList((Writable) new LongWritable(1451606400000L), new IntWritable(0),
            new IntWritable(0)));
    sequence.add(Arrays.asList((Writable) new LongWritable(1451606400000L + 100L), new IntWritable(1),
            new IntWritable(0)));
    sequence.add(Arrays.asList((Writable) new LongWritable(1451606400000L + 200L), new IntWritable(2),
            new IntWritable(0)));

    Schema schema = new SequenceSchema.Builder().addColumnTime("timecolumn", DateTimeZone.UTC)
            .addColumnInteger("intcolumn").addColumnInteger("intcolumn2").build();
    TransformProcess transformProcess = new TransformProcess.Builder(schema).removeColumns("intcolumn2").build();
    InMemorySequenceRecordReader inMemorySequenceRecordReader =
            new InMemorySequenceRecordReader(Arrays.asList(sequence));
    LocalTransformProcessSequenceRecordReader transformProcessSequenceRecordReader =
            new LocalTransformProcessSequenceRecordReader(inMemorySequenceRecordReader, transformProcess);
    List<List<Writable>> next = transformProcessSequenceRecordReader.sequenceRecord();
    assertEquals(2, next.get(0).size());

}
 
Example #10
Source File: CategoricalToOneHotTransform.java    From deeplearning4j with Apache License 2.0 6 votes vote down vote up
@Override
public void setInputSchema(Schema inputSchema) {
    super.setInputSchema(inputSchema);

    columnIdx = inputSchema.getIndexOfColumn(columnName);
    ColumnMetaData meta = inputSchema.getMetaData(columnName);
    if (!(meta instanceof CategoricalMetaData))
        throw new IllegalStateException("Cannot convert column \"" + columnName
                        + "\" from categorical to one-hot: column is not categorical (is: " + meta.getColumnType()
                        + ")");
    this.stateNames = ((CategoricalMetaData) meta).getStateNames();

    this.statesMap = new HashMap<>(stateNames.size());
    for (int i = 0; i < stateNames.size(); i++) {
        this.statesMap.put(stateNames.get(i), i);
    }
}
 
Example #11
Source File: TestTransforms.java    From DataVec with Apache License 2.0 6 votes vote down vote up
@Test
public void testStringToCategoricalTransform() {
    Schema schema = getSchema(ColumnType.String);

    Transform transform = new StringToCategoricalTransform("column", Arrays.asList("zero", "one", "two"));
    transform.setInputSchema(schema);
    Schema out = transform.transform(schema);

    assertEquals(1, out.getColumnMetaData().size());
    TestCase.assertEquals(ColumnType.Categorical, out.getMetaData(0).getColumnType());
    CategoricalMetaData meta = (CategoricalMetaData) out.getMetaData(0);
    assertEquals(Arrays.asList("zero", "one", "two"), meta.getStateNames());

    assertEquals(Collections.singletonList((Writable) new Text("zero")),
            transform.map(Collections.singletonList((Writable) new Text("zero"))));
    assertEquals(Collections.singletonList((Writable) new Text("one")),
            transform.map(Collections.singletonList((Writable) new Text("one"))));
    assertEquals(Collections.singletonList((Writable) new Text("two")),
            transform.map(Collections.singletonList((Writable) new Text("two"))));
}
 
Example #12
Source File: ArrowConverterTest.java    From deeplearning4j with Apache License 2.0 6 votes vote down vote up
@Test
public void testToArrayFromINDArray() {
    Schema.Builder schemaBuilder = new Schema.Builder();
    schemaBuilder.addColumnNDArray("outputArray",new long[]{1,4});
    Schema schema = schemaBuilder.build();
    int numRows = 4;
    List<List<Writable>> ret = new ArrayList<>(numRows);
    for(int i = 0; i < numRows; i++) {
        ret.add(Arrays.<Writable>asList(new NDArrayWritable(Nd4j.linspace(1,4,4).reshape(1, 4))));
    }

    List<FieldVector> fieldVectors = ArrowConverter.toArrowColumns(bufferAllocator, schema, ret);
    ArrowWritableRecordBatch arrowWritableRecordBatch = new ArrowWritableRecordBatch(fieldVectors,schema);
    INDArray array = ArrowConverter.toArray(arrowWritableRecordBatch);
    assertArrayEquals(new long[]{4,4},array.shape());

    INDArray assertion = Nd4j.repeat(Nd4j.linspace(1,4,4),4).reshape(4,4);
    assertEquals(assertion,array);
}
 
Example #13
Source File: TestTransforms.java    From deeplearning4j with Apache License 2.0 6 votes vote down vote up
@Test
public void testCategoricalToOneHotTransform() {
    Schema schema = getSchema(ColumnType.Categorical, "zero", "one", "two");

    Transform transform = new CategoricalToOneHotTransform("column");
    transform.setInputSchema(schema);
    Schema out = transform.transform(schema);

    assertEquals(3, out.getColumnMetaData().size());
    for (int i = 0; i < 3; i++) {
        TestCase.assertEquals(ColumnType.Integer, out.getMetaData(i).getColumnType());
        IntegerMetaData meta = (IntegerMetaData) out.getMetaData(i);
        assertNotNull(meta.getMinAllowedValue());
        assertEquals(0, (int) meta.getMinAllowedValue());

        assertNotNull(meta.getMaxAllowedValue());
        assertEquals(1, (int) meta.getMaxAllowedValue());
    }

    assertEquals(Arrays.asList(new IntWritable(1), new IntWritable(0), new IntWritable(0)),
            transform.map(Collections.singletonList((Writable) new Text("zero"))));
    assertEquals(Arrays.asList(new IntWritable(0), new IntWritable(1), new IntWritable(0)),
            transform.map(Collections.singletonList((Writable) new Text("one"))));
    assertEquals(Arrays.asList(new IntWritable(0), new IntWritable(0), new IntWritable(1)),
            transform.map(Collections.singletonList((Writable) new Text("two"))));
}
 
Example #14
Source File: TestTransforms.java    From DataVec with Apache License 2.0 6 votes vote down vote up
@Test
public void testLongColumnsMathOpTransform() {
    Schema schema = new Schema.Builder().addColumnLong("first").addColumnString("second").addColumnLong("third")
            .build();

    Transform transform = new LongColumnsMathOpTransform("out", MathOp.Add, "first", "third");
    transform.setInputSchema(schema);

    Schema out = transform.transform(schema);
    assertEquals(4, out.numColumns());
    assertEquals(Arrays.asList("first", "second", "third", "out"), out.getColumnNames());
    assertEquals(Arrays.asList(ColumnType.Long, ColumnType.String, ColumnType.Long, ColumnType.Long),
            out.getColumnTypes());


    assertEquals(Arrays.asList((Writable) new LongWritable(1), new Text("something"), new LongWritable(2),
            new LongWritable(3)),
            transform.map(Arrays.asList((Writable) new LongWritable(1), new Text("something"),
                    new LongWritable(2))));
    assertEquals(Arrays.asList((Writable) new LongWritable(100), new Text("something2"), new LongWritable(21),
            new LongWritable(121)),
            transform.map(Arrays.asList((Writable) new LongWritable(100), new Text("something2"),
                    new LongWritable(21))));
}
 
Example #15
Source File: LocalTransformExecutor.java    From deeplearning4j with Apache License 2.0 6 votes vote down vote up
/**
 * Convert a string time series to
 * the proper writable set based on the schema.
 * Note that this does not use arrow.
 * This just uses normal writable objects.
 *
 * @param stringInput the string input
 * @param schema the schema to use
 * @return the converted records
 */
public static List<List<Writable>> convertStringInput(List<List<String>> stringInput,Schema schema) {
    List<List<Writable>> ret = new ArrayList<>();
    List<List<Writable>> timeStepAdd = new ArrayList<>();
    for(int j = 0; j < stringInput.size(); j++) {
        List<String> record = stringInput.get(j);
        List<Writable> recordAdd = new ArrayList<>();
        for(int k = 0; k < record.size(); k++) {
            switch(schema.getType(k)) {
                case Double: recordAdd.add(new DoubleWritable(Double.parseDouble(record.get(k)))); break;
                case Float:  recordAdd.add(new FloatWritable(Float.parseFloat(record.get(k)))); break;
                case Integer:  recordAdd.add(new IntWritable(Integer.parseInt(record.get(k)))); break;
                case Long:  recordAdd.add(new LongWritable(Long.parseLong(record.get(k)))); break;
                case String: recordAdd.add(new Text(record.get(k))); break;
                case Time: recordAdd.add(new LongWritable(Long.parseLong(record.get(k)))); break;

            }
        }

        timeStepAdd.add(recordAdd);
    }


    return ret;
}
 
Example #16
Source File: TestPythonTransformProcess.java    From deeplearning4j with Apache License 2.0 6 votes vote down vote up
@Test
public void testPythonTransformNoOutputSpecified() throws Exception {
    PythonTransform pythonTransform = PythonTransform.builder()
            .code("a += 2; b = 'hello world'")
            .returnAllInputs(true)
            .build();
    List<List<Writable>> inputs = new ArrayList<>();
    inputs.add(Arrays.asList((Writable)new IntWritable(1)));
    Schema inputSchema = new Builder()
            .addColumnInteger("a")
            .build();

    TransformProcess tp = new TransformProcess.Builder(inputSchema)
            .transform(pythonTransform)
            .build();
    List<List<Writable>> execute = LocalTransformExecutor.execute(inputs, tp);
    assertEquals(3,execute.get(0).get(0).toInt());
    assertEquals("hello world",execute.get(0).get(1).toString());

}
 
Example #17
Source File: TestTransforms.java    From DataVec with Apache License 2.0 6 votes vote down vote up
@Test
public void testAppendStringColumnTransform() {
    Schema schema = getSchema(ColumnType.String);

    Transform transform = new AppendStringColumnTransform("column", "_AppendThis");
    transform.setInputSchema(schema);
    Schema out = transform.transform(schema);

    assertEquals(1, out.getColumnMetaData().size());
    TestCase.assertEquals(ColumnType.String, out.getMetaData(0).getColumnType());

    assertEquals(Collections.singletonList((Writable) new Text("one_AppendThis")),
            transform.map(Collections.singletonList((Writable) new Text("one"))));
    assertEquals(Collections.singletonList((Writable) new Text("two_AppendThis")),
            transform.map(Collections.singletonList((Writable) new Text("two"))));
    assertEquals(Collections.singletonList((Writable) new Text("three_AppendThis")),
            transform.map(Collections.singletonList((Writable) new Text("three"))));
}
 
Example #18
Source File: TestTransforms.java    From deeplearning4j with Apache License 2.0 6 votes vote down vote up
@Test
public void testDoubleColumnsMathOpTransform() {
    Schema schema = new Schema.Builder().addColumnString("first").addColumnDouble("second").addColumnDouble("third")
            .build();

    Transform transform = new DoubleColumnsMathOpTransform("out", MathOp.Add, "second", "third");
    transform.setInputSchema(schema);

    Schema out = transform.transform(schema);
    assertEquals(4, out.numColumns());
    assertEquals(Arrays.asList("first", "second", "third", "out"), out.getColumnNames());
    assertEquals(Arrays.asList(ColumnType.String, ColumnType.Double, ColumnType.Double, ColumnType.Double),
            out.getColumnTypes());


    assertEquals(Arrays.asList((Writable) new Text("something"), new DoubleWritable(1.0), new DoubleWritable(2.1),
            new DoubleWritable(3.1)),
            transform.map(Arrays.asList((Writable) new Text("something"), new DoubleWritable(1.0),
                    new DoubleWritable(2.1))));
    assertEquals(Arrays.asList((Writable) new Text("something2"), new DoubleWritable(100.0),
            new DoubleWritable(21.1), new DoubleWritable(121.1)),
            transform.map(Arrays.asList((Writable) new Text("something2"), new DoubleWritable(100.0),
                    new DoubleWritable(21.1))));
}
 
Example #19
Source File: BaseSequenceExpansionTransform.java    From DataVec with Apache License 2.0 6 votes vote down vote up
@Override
public Schema transform(Schema inputSchema) {
    //Same schema *except* for the expanded columns

    List<ColumnMetaData> meta = new ArrayList<>(inputSchema.numColumns());

    List<ColumnMetaData> oldMetaToExpand = new ArrayList<>();
    for(String s : requiredColumns){
        oldMetaToExpand.add(inputSchema.getMetaData(s));
    }
    List<ColumnMetaData> newMetaToExpand = expandedColumnMetaDatas(oldMetaToExpand, expandedColumnNames);

    int modColumnIdx = 0;
    for(ColumnMetaData m : inputSchema.getColumnMetaData()){

        if(requiredColumns.contains(m.getName())){
            //Possibly changed column (expanded)
            meta.add(newMetaToExpand.get(modColumnIdx++));
        } else {
            //Unmodified column
            meta.add(m);
        }
    }

    return inputSchema.newSchema(meta);
}
 
Example #20
Source File: DuplicateColumnsTransform.java    From deeplearning4j with Apache License 2.0 5 votes vote down vote up
@Override
public void setInputSchema(Schema inputSchema) {
    columnIndexesToDuplicateSet.clear();

    List<String> schemaColumnNames = inputSchema.getColumnNames();
    for (String s : columnsToDuplicate) {
        int idx = schemaColumnNames.indexOf(s);
        if (idx == -1)
            throw new IllegalStateException("Invalid state: column to duplicate \"" + s + "\" does not appear "
                            + "in input schema");
        columnIndexesToDuplicateSet.add(idx);
    }

    this.inputSchema = inputSchema;
}
 
Example #21
Source File: TestTransforms.java    From DataVec with Apache License 2.0 5 votes vote down vote up
public static Schema getSchema(ColumnType type, String... colNames) {

        Schema.Builder schema = new Schema.Builder();

        switch (type) {
            case String:
                schema.addColumnString("column");
                break;
            case Integer:
                schema.addColumnInteger("column");
                break;
            case Long:
                schema.addColumnLong("column");
                break;
            case Double:
                schema.addColumnDouble("column");
                break;
            case Categorical:
                schema.addColumnCategorical("column", colNames);
                break;
            case Time:
                schema.addColumnTime("column", DateTimeZone.UTC);
                break;
            default:
                throw new RuntimeException();
        }
        return schema.build();
    }
 
Example #22
Source File: LegacyMappingHelper.java    From DataVec with Apache License 2.0 5 votes vote down vote up
private static Map<String,String> getLegacyMappingSchema(){
    if(mapSchema == null) {
        Map<String, String> m = new HashMap<>();
        m.put("Schema", Schema.class.getName());
        m.put("SequenceSchema", SequenceSchema.class.getName());

        mapSchema = m;
    }
    return mapSchema;
}
 
Example #23
Source File: TestTransforms.java    From DataVec with Apache License 2.0 5 votes vote down vote up
@Test
public void testReplaceStringTransform() {
    Schema schema = getSchema(ColumnType.String);

    // Linked
    Map<String, String> map = new LinkedHashMap<>();
    map.put("mid", "C2");
    map.put("\\d", "one");
    Transform transform = new ReplaceStringTransform("column", map);
    transform.setInputSchema(schema);
    Schema out = transform.transform(schema);

    assertEquals(1, out.getColumnMetaData().size());
    TestCase.assertEquals(ColumnType.String, out.getMetaData(0).getColumnType());

    assertEquals(Collections.singletonList((Writable) new Text("BoneConeTone")),
            transform.map(Collections.singletonList((Writable) new Text("B1midT3"))));

    // No link
    map = new HashMap<>();
    map.put("^\\s+|\\s+$", "");
    transform = new ReplaceStringTransform("column", map);
    transform.setInputSchema(schema);
    out = transform.transform(schema);

    assertEquals(1, out.getColumnMetaData().size());
    TestCase.assertEquals(ColumnType.String, out.getMetaData(0).getColumnType());

    assertEquals(Collections.singletonList((Writable) new Text("4.25")),
            transform.map(Collections.singletonList((Writable) new Text("  4.25 "))));
}
 
Example #24
Source File: CalculateSortedRank.java    From deeplearning4j with Apache License 2.0 5 votes vote down vote up
@Override
public Schema transform(Schema inputSchema) {
    if (inputSchema instanceof SequenceSchema)
        throw new IllegalStateException("Calculating sorted rank on sequences: not yet supported");

    List<ColumnMetaData> origMeta = inputSchema.getColumnMetaData();
    List<ColumnMetaData> newMeta = new ArrayList<>(origMeta);

    newMeta.add(new LongMetaData(newColumnName, 0L, null));

    return inputSchema.newSchema(newMeta);
}
 
Example #25
Source File: Normalization.java    From DataVec with Apache License 2.0 5 votes vote down vote up
/**
 * Normalize the sequence by zero mean unit variance
 *
 * @param schema         Schema of the data to normalize
 * @param sequence       Sequence data
 * @param excludeColumns List of  columns to exclude from the normalization
 * @return Normalized sequence
 */
public static JavaRDD<List<List<Writable>>> zeroMeanUnitVarianceSequence(Schema schema,
                JavaRDD<List<List<Writable>>> sequence, List<String> excludeColumns) {
    DataRowsFacade frame = DataFrames.toDataFrameSequence(schema, sequence);
    if (excludeColumns == null)
        excludeColumns = Arrays.asList(DataFrames.SEQUENCE_UUID_COLUMN, DataFrames.SEQUENCE_INDEX_COLUMN);
    else {
        excludeColumns = new ArrayList<>(excludeColumns);
        excludeColumns.add(DataFrames.SEQUENCE_UUID_COLUMN);
        excludeColumns.add(DataFrames.SEQUENCE_INDEX_COLUMN);
    }
    frame = zeromeanUnitVariance(frame, excludeColumns);
    return DataFrames.toRecordsSequence(frame).getSecond();
}
 
Example #26
Source File: LocalTransformProcessRecordReaderTests.java    From DataVec with Apache License 2.0 5 votes vote down vote up
@Test
public void simpleTransformTest() throws Exception {
    Schema schema = new Schema.Builder().addColumnDouble("0").addColumnDouble("1").addColumnDouble("2")
            .addColumnDouble("3").addColumnDouble("4").build();
    TransformProcess transformProcess = new TransformProcess.Builder(schema).removeColumns("0").build();
    CSVRecordReader csvRecordReader = new CSVRecordReader();
    csvRecordReader.initialize(new FileSplit(new ClassPathResource("iris.dat").getFile()));
    LocalTransformProcessRecordReader transformProcessRecordReader =
            new LocalTransformProcessRecordReader(csvRecordReader, transformProcess);
    assertEquals(4, transformProcessRecordReader.next().size());

}
 
Example #27
Source File: SequenceMovingWindowReduceTransform.java    From deeplearning4j with Apache License 2.0 5 votes vote down vote up
@Override
public Schema transform(Schema inputSchema) {
    int colIdx = inputSchema.getIndexOfColumn(columnName);

    //Approach here: The reducer gives us a schema for one time step -> simply convert this to a sequence schema...
    List<ColumnMetaData> oldMeta = inputSchema.getColumnMetaData();
    List<ColumnMetaData> meta = new ArrayList<>(oldMeta);

    ColumnMetaData m;
    switch (op) {
        case Min:
        case Max:
        case Range:
        case TakeFirst:
        case TakeLast:
            //Same type as input
            m = oldMeta.get(colIdx);
            m = m.clone();
            m.setName(newColumnName);
            break;
        case Prod:
        case Sum:
        case Mean:
        case Stdev:
            //Double type
            m = new DoubleMetaData(newColumnName);
            break;
        case Count:
        case CountUnique:
            //Integer type
            m = new IntegerMetaData(newColumnName);
            break;
        default:
            throw new UnsupportedOperationException("Unknown op type: " + op);
    }
    meta.add(m);

    return new SequenceSchema(meta);
}
 
Example #28
Source File: TestTransforms.java    From DataVec with Apache License 2.0 5 votes vote down vote up
@Test
public void testTextToCharacterIndexTransform(){

    Schema s = new Schema.Builder().addColumnString("col").addColumnDouble("d").build();

    List<List<Writable>> inSeq = Arrays.asList(
            Arrays.<Writable>asList(new Text("text"), new DoubleWritable(1.0)),
            Arrays.<Writable>asList(new Text("ab"), new DoubleWritable(2.0)));

    Map<Character,Integer> map = new HashMap<>();
    map.put('a', 0);
    map.put('b', 1);
    map.put('e', 2);
    map.put('t', 3);
    map.put('x', 4);

    List<List<Writable>> exp = Arrays.asList(
            Arrays.<Writable>asList(new IntWritable(3), new DoubleWritable(1.0)),
            Arrays.<Writable>asList(new IntWritable(2), new DoubleWritable(1.0)),
            Arrays.<Writable>asList(new IntWritable(4), new DoubleWritable(1.0)),
            Arrays.<Writable>asList(new IntWritable(3), new DoubleWritable(1.0)),
            Arrays.<Writable>asList(new IntWritable(0), new DoubleWritable(2.0)),
            Arrays.<Writable>asList(new IntWritable(1), new DoubleWritable(2.0)));

    Transform t = new TextToCharacterIndexTransform("col", "newName", map, false);
    t.setInputSchema(s);

    Schema outputSchema = t.transform(s);
    assertEquals(2, outputSchema.getColumnNames().size());
    assertEquals(ColumnType.Integer, outputSchema.getType(0));
    assertEquals(ColumnType.Double, outputSchema.getType(1));

    IntegerMetaData intMetadata = (IntegerMetaData)outputSchema.getMetaData(0);
    assertEquals(0, (int)intMetadata.getMinAllowedValue());
    assertEquals(4, (int)intMetadata.getMaxAllowedValue());

    List<List<Writable>> out = t.mapSequence(inSeq);
    assertEquals(exp, out);
}
 
Example #29
Source File: ArrowWritableRecordBatch.java    From DataVec with Apache License 2.0 5 votes vote down vote up
public ArrowWritableRecordBatch(List<FieldVector> list,Schema schema,int offset,int rows) {
    this.list = list;
    this.schema = schema;
    //each column should have same number of rows
    this.offset = offset;
    this.size = rows;

}
 
Example #30
Source File: TestTransforms.java    From deeplearning4j with Apache License 2.0 5 votes vote down vote up
@Test
public void testStringListToCategoricalSetTransform() {
    //Idea: String list to a set of categories... "a,c" for categories {a,b,c} -> "true","false","true"

    Schema schema = getSchema(ColumnType.String);

    Transform transform = new StringListToCategoricalSetTransform("column", Arrays.asList("a", "b", "c"),
            Arrays.asList("a", "b", "c"), ",");
    transform.setInputSchema(schema);

    Schema out = transform.transform(schema);
    assertEquals(3, out.getColumnMetaData().size());
    for (int i = 0; i < 3; i++) {
        TestCase.assertEquals(ColumnType.Categorical, out.getType(i));
        CategoricalMetaData meta = (CategoricalMetaData) out.getMetaData(i);
        assertEquals(Arrays.asList("true", "false"), meta.getStateNames());
    }

    assertEquals(Arrays.asList(new Text("false"), new Text("false"), new Text("false")),
            transform.map(Collections.singletonList((Writable) new Text(""))));
    assertEquals(Arrays.asList(new Text("true"), new Text("false"), new Text("false")),
            transform.map(Collections.singletonList((Writable) new Text("a"))));
    assertEquals(Arrays.asList(new Text("false"), new Text("true"), new Text("false")),
            transform.map(Collections.singletonList((Writable) new Text("b"))));
    assertEquals(Arrays.asList(new Text("false"), new Text("false"), new Text("true")),
            transform.map(Collections.singletonList((Writable) new Text("c"))));
    assertEquals(Arrays.asList(new Text("true"), new Text("false"), new Text("true")),
            transform.map(Collections.singletonList((Writable) new Text("a,c"))));
    assertEquals(Arrays.asList(new Text("true"), new Text("true"), new Text("true")),
            transform.map(Collections.singletonList((Writable) new Text("a,b,c"))));
}