Java Code Examples for org.datavec.api.transform.ColumnType#String

The following examples show how to use org.datavec.api.transform.ColumnType#String . You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: TestGeoReduction.java    From DataVec with Apache License 2.0 5 votes vote down vote up
@Test
public void testCustomReductions() {

    List<List<Writable>> inputs = new ArrayList<>();
    inputs.add(Arrays.asList((Writable) new Text("someKey"), new Text("1#5")));
    inputs.add(Arrays.asList((Writable) new Text("someKey"), new Text("2#6")));
    inputs.add(Arrays.asList((Writable) new Text("someKey"), new Text("3#7")));
    inputs.add(Arrays.asList((Writable) new Text("someKey"), new Text("4#8")));

    List<Writable> expected = Arrays.asList((Writable) new Text("someKey"), new Text("10.0#26.0"));

    Schema schema = new Schema.Builder().addColumnString("key").addColumnString("coord").build();

    Reducer reducer = new Reducer.Builder(ReduceOp.Count).keyColumns("key")
                    .customReduction("coord", new CoordinatesReduction("coordSum", ReduceOp.Sum, "#")).build();

    reducer.setInputSchema(schema);

    IAggregableReduceOp<List<Writable>, List<Writable>> aggregableReduceOp = reducer.aggregableReducer();
    for (List<Writable> l : inputs)
        aggregableReduceOp.accept(l);
    List<Writable> out = aggregableReduceOp.get();

    assertEquals(2, out.size());
    assertEquals(expected, out);

    //Check schema:
    String[] expNames = new String[] {"key", "coordSum"};
    ColumnType[] expTypes = new ColumnType[] {ColumnType.String, ColumnType.String};
    Schema outSchema = reducer.transform(schema);

    assertEquals(2, outSchema.numColumns());
    for (int i = 0; i < 2; i++) {
        assertEquals(expNames[i], outSchema.getName(i));
        assertEquals(expTypes[i], outSchema.getType(i));
    }
}
 
Example 2
Source File: StringListToCountsNDArrayTransform.java    From DataVec with Apache License 2.0 5 votes vote down vote up
@Override
public Schema transform(Schema inputSchema) {

    int colIdx = inputSchema.getIndexOfColumn(columnName);

    List<ColumnMetaData> oldMeta = inputSchema.getColumnMetaData();
    List<ColumnMetaData> newMeta = new ArrayList<>();
    List<String> oldNames = inputSchema.getColumnNames();

    Iterator<ColumnMetaData> typesIter = oldMeta.iterator();
    Iterator<String> namesIter = oldNames.iterator();

    int i = 0;
    while (typesIter.hasNext()) {
        ColumnMetaData t = typesIter.next();
        String name = namesIter.next();
        if (i++ == colIdx) {
            //Replace String column with a set of binary/integer columns
            if (t.getColumnType() != ColumnType.String)
                throw new IllegalStateException("Cannot convert non-string type");

            ColumnMetaData meta = new NDArrayMetaData(newColumnName, new long[] {vocabulary.size()});
            newMeta.add(meta);
        } else {
            newMeta.add(t);
        }
    }

    return inputSchema.newSchema(newMeta);

}
 
Example 3
Source File: StringListToCategoricalSetTransform.java    From DataVec with Apache License 2.0 5 votes vote down vote up
@Override
public Schema transform(Schema inputSchema) {

    int colIdx = inputSchema.getIndexOfColumn(columnName);

    List<ColumnMetaData> oldMeta = inputSchema.getColumnMetaData();
    List<ColumnMetaData> newMeta = new ArrayList<>(oldMeta.size() + newColumnNames.size() - 1);
    List<String> oldNames = inputSchema.getColumnNames();

    Iterator<ColumnMetaData> typesIter = oldMeta.iterator();
    Iterator<String> namesIter = oldNames.iterator();

    int i = 0;
    while (typesIter.hasNext()) {
        ColumnMetaData t = typesIter.next();
        String name = namesIter.next();
        if (i++ == colIdx) {
            //Replace String column with a set of binary/categorical columns
            if (t.getColumnType() != ColumnType.String)
                throw new IllegalStateException("Cannot convert non-string type");

            for (int j = 0; j < newColumnNames.size(); j++) {
                ColumnMetaData meta = new CategoricalMetaData(newColumnNames.get(j), "true", "false");
                newMeta.add(meta);
            }
        } else {
            newMeta.add(t);
        }
    }

    return inputSchema.newSchema(newMeta);

}
 
Example 4
Source File: ParseDoubleTransform.java    From DataVec with Apache License 2.0 5 votes vote down vote up
/**
 * Get the output schema for this transformation, given an input schema
 *
 * @param inputSchema
 */
@Override
public Schema transform(Schema inputSchema) {
    Schema.Builder newSchema = new Schema.Builder();
    for (int i = 0; i < inputSchema.numColumns(); i++) {
        if (inputSchema.getType(i) == ColumnType.String) {
            newSchema.addColumnDouble(inputSchema.getMetaData(i).getName());
        } else
            newSchema.addColumn(inputSchema.getMetaData(i));

    }
    return newSchema.build();
}
 
Example 5
Source File: TestGeoReduction.java    From deeplearning4j with Apache License 2.0 5 votes vote down vote up
@Test
public void testCustomReductions() {

    List<List<Writable>> inputs = new ArrayList<>();
    inputs.add(Arrays.asList((Writable) new Text("someKey"), new Text("1#5")));
    inputs.add(Arrays.asList((Writable) new Text("someKey"), new Text("2#6")));
    inputs.add(Arrays.asList((Writable) new Text("someKey"), new Text("3#7")));
    inputs.add(Arrays.asList((Writable) new Text("someKey"), new Text("4#8")));

    List<Writable> expected = Arrays.asList((Writable) new Text("someKey"), new Text("10.0#26.0"));

    Schema schema = new Schema.Builder().addColumnString("key").addColumnString("coord").build();

    Reducer reducer = new Reducer.Builder(ReduceOp.Count).keyColumns("key")
                    .customReduction("coord", new CoordinatesReduction("coordSum", ReduceOp.Sum, "#")).build();

    reducer.setInputSchema(schema);

    IAggregableReduceOp<List<Writable>, List<Writable>> aggregableReduceOp = reducer.aggregableReducer();
    for (List<Writable> l : inputs)
        aggregableReduceOp.accept(l);
    List<Writable> out = aggregableReduceOp.get();

    assertEquals(2, out.size());
    assertEquals(expected, out);

    //Check schema:
    String[] expNames = new String[] {"key", "coordSum"};
    ColumnType[] expTypes = new ColumnType[] {ColumnType.String, ColumnType.String};
    Schema outSchema = reducer.transform(schema);

    assertEquals(2, outSchema.numColumns());
    for (int i = 0; i < 2; i++) {
        assertEquals(expNames[i], outSchema.getName(i));
        assertEquals(expTypes[i], outSchema.getType(i));
    }
}
 
Example 6
Source File: StringListToCountsNDArrayTransform.java    From deeplearning4j with Apache License 2.0 5 votes vote down vote up
@Override
public Schema transform(Schema inputSchema) {

    int colIdx = inputSchema.getIndexOfColumn(columnName);

    List<ColumnMetaData> oldMeta = inputSchema.getColumnMetaData();
    List<ColumnMetaData> newMeta = new ArrayList<>();
    List<String> oldNames = inputSchema.getColumnNames();

    Iterator<ColumnMetaData> typesIter = oldMeta.iterator();
    Iterator<String> namesIter = oldNames.iterator();

    int i = 0;
    while (typesIter.hasNext()) {
        ColumnMetaData t = typesIter.next();
        String name = namesIter.next();
        if (i++ == colIdx) {
            //Replace String column with a set of binary/integer columns
            if (t.getColumnType() != ColumnType.String)
                throw new IllegalStateException("Cannot convert non-string type");

            ColumnMetaData meta = new NDArrayMetaData(newColumnName, new long[] {vocabulary.size()});
            newMeta.add(meta);
        } else {
            newMeta.add(t);
        }
    }

    return inputSchema.newSchema(newMeta);

}
 
Example 7
Source File: StringListToCategoricalSetTransform.java    From deeplearning4j with Apache License 2.0 5 votes vote down vote up
@Override
public Schema transform(Schema inputSchema) {

    int colIdx = inputSchema.getIndexOfColumn(columnName);

    List<ColumnMetaData> oldMeta = inputSchema.getColumnMetaData();
    List<ColumnMetaData> newMeta = new ArrayList<>(oldMeta.size() + newColumnNames.size() - 1);
    List<String> oldNames = inputSchema.getColumnNames();

    Iterator<ColumnMetaData> typesIter = oldMeta.iterator();
    Iterator<String> namesIter = oldNames.iterator();

    int i = 0;
    while (typesIter.hasNext()) {
        ColumnMetaData t = typesIter.next();
        String name = namesIter.next();
        if (i++ == colIdx) {
            //Replace String column with a set of binary/categorical columns
            if (t.getColumnType() != ColumnType.String)
                throw new IllegalStateException("Cannot convert non-string type");

            for (int j = 0; j < newColumnNames.size(); j++) {
                ColumnMetaData meta = new CategoricalMetaData(newColumnNames.get(j), "true", "false");
                newMeta.add(meta);
            }
        } else {
            newMeta.add(t);
        }
    }

    return inputSchema.newSchema(newMeta);

}
 
Example 8
Source File: ParseDoubleTransform.java    From deeplearning4j with Apache License 2.0 5 votes vote down vote up
/**
 * Get the output schema for this transformation, given an input schema
 *
 * @param inputSchema
 */
@Override
public Schema transform(Schema inputSchema) {
    Schema.Builder newSchema = new Schema.Builder();
    for (int i = 0; i < inputSchema.numColumns(); i++) {
        if (inputSchema.getType(i) == ColumnType.String) {
            newSchema.addColumnDouble(inputSchema.getMetaData(i).getName());
        } else
            newSchema.addColumn(inputSchema.getMetaData(i));

    }
    return newSchema.build();
}
 
Example 9
Source File: StringMetaData.java    From DataVec with Apache License 2.0 4 votes vote down vote up
@Override
public ColumnType getColumnType() {
    return ColumnType.String;
}
 
Example 10
Source File: StringAnalysis.java    From DataVec with Apache License 2.0 4 votes vote down vote up
@Override
public ColumnType getColumnType() {
    return ColumnType.String;
}
 
Example 11
Source File: TestMultiOpReduce.java    From DataVec with Apache License 2.0 4 votes vote down vote up
@Test
public void testCustomReductions() {

    List<List<Writable>> inputs = new ArrayList<>();
    inputs.add(Arrays.asList((Writable) new Text("someKey"), new IntWritable(1), new Text("zero"),
                    new DoubleWritable(0)));
    inputs.add(Arrays.asList((Writable) new Text("someKey"), new IntWritable(2), new Text("one"),
                    new DoubleWritable(1)));
    inputs.add(Arrays.asList((Writable) new Text("someKey"), new IntWritable(3), new Text("two"),
                    new DoubleWritable(2)));
    inputs.add(Arrays.asList((Writable) new Text("someKey"), new IntWritable(4), new Text("three"),
                    new DoubleWritable(3)));

    List<Writable> expected = Arrays.asList((Writable) new Text("someKey"), new IntWritable(10), new Text("one"),
                    new DoubleWritable(1));


    Schema schema = new Schema.Builder().addColumnString("key").addColumnInteger("intCol")
                    .addColumnString("textCol").addColumnString("doubleCol").build();

    Reducer reducer = new Reducer.Builder(ReduceOp.Sum).keyColumns("key")
                    .customReduction("textCol", new CustomReduceTakeSecond())
                    .customReduction("doubleCol", new CustomReduceTakeSecond()).build();

    reducer.setInputSchema(schema);


    IAggregableReduceOp<List<Writable>, List<Writable>> accumulator = reducer.aggregableReducer();

    for (int i = 0; i < inputs.size(); i++) {
        accumulator.accept(inputs.get(i));
    }
    List<Writable> out = accumulator.get();

    assertEquals(4, out.size());
    assertEquals(expected, out);

    //Check schema:
    String[] expNames = new String[] {"key", "sum(intCol)", "myCustomReduce(textCol)", "myCustomReduce(doubleCol)"};
    ColumnType[] expTypes =
                    new ColumnType[] {ColumnType.String, ColumnType.Integer, ColumnType.String, ColumnType.String};
    Schema outSchema = reducer.transform(schema);

    assertEquals(4, outSchema.numColumns());
    for (int i = 0; i < 4; i++) {
        assertEquals(expNames[i], outSchema.getName(i));
        assertEquals(expTypes[i], outSchema.getType(i));
    }
}
 
Example 12
Source File: TestMultiOpReduce.java    From DataVec with Apache License 2.0 4 votes vote down vote up
@Test
public void testCustomReductionsWithCondition() {

    List<List<Writable>> inputs = new ArrayList<>();
    inputs.add(Arrays.asList((Writable) new Text("someKey"), new IntWritable(1), new Text("zero"),
            new DoubleWritable(0)));
    inputs.add(Arrays.asList((Writable) new Text("someKey"), new IntWritable(2), new Text("one"),
            new DoubleWritable(1)));
    inputs.add(Arrays.asList((Writable) new Text("someKey"), new IntWritable(3), new Text("two"),
            new DoubleWritable(2)));
    inputs.add(Arrays.asList((Writable) new Text("someKey"), new IntWritable(4), new Text("three"),
            new DoubleWritable(3)));

    List<Writable> expected = Arrays.asList((Writable) new Text("someKey"), new IntWritable(10), new IntWritable(3),
            new DoubleWritable(1));


    Schema schema = new Schema.Builder().addColumnString("key").addColumnInteger("intCol")
            .addColumnString("textCol").addColumnString("doubleCol").build();

    Reducer reducer = new Reducer.Builder(ReduceOp.Sum).keyColumns("key")
            .conditionalReduction("textCol", "condTextCol",
                    ReduceOp.Count, new StringColumnCondition("textCol", ConditionOp.NotEqual, "three"))
            .customReduction("doubleCol", new CustomReduceTakeSecond()).build();

    reducer.setInputSchema(schema);


    IAggregableReduceOp<List<Writable>, List<Writable>> accumulator = reducer.aggregableReducer();

    for (int i = 0; i < inputs.size(); i++) {
        accumulator.accept(inputs.get(i));
    }
    List<Writable> out = accumulator.get();

    assertEquals(4, out.size());
    assertEquals(expected, out);

    //Check schema:
    String[] expNames = new String[] {"key", "sum(intCol)", "condTextCol", "myCustomReduce(doubleCol)"};
    ColumnType[] expTypes =
            new ColumnType[] {ColumnType.String, ColumnType.Integer, ColumnType.Long, ColumnType.String};
    Schema outSchema = reducer.transform(schema);

    assertEquals(4, outSchema.numColumns());
    for (int i = 0; i < 4; i++) {
        assertEquals(expNames[i], outSchema.getName(i));
        assertEquals(expTypes[i], outSchema.getType(i));
    }
}
 
Example 13
Source File: StringMetaData.java    From deeplearning4j with Apache License 2.0 4 votes vote down vote up
@Override
public ColumnType getColumnType() {
    return ColumnType.String;
}
 
Example 14
Source File: StringAnalysis.java    From deeplearning4j with Apache License 2.0 4 votes vote down vote up
@Override
public ColumnType getColumnType() {
    return ColumnType.String;
}
 
Example 15
Source File: TestMultiOpReduce.java    From deeplearning4j with Apache License 2.0 4 votes vote down vote up
@Test
public void testCustomReductions() {

    List<List<Writable>> inputs = new ArrayList<>();
    inputs.add(Arrays.asList((Writable) new Text("someKey"), new IntWritable(1), new Text("zero"),
                    new DoubleWritable(0)));
    inputs.add(Arrays.asList((Writable) new Text("someKey"), new IntWritable(2), new Text("one"),
                    new DoubleWritable(1)));
    inputs.add(Arrays.asList((Writable) new Text("someKey"), new IntWritable(3), new Text("two"),
                    new DoubleWritable(2)));
    inputs.add(Arrays.asList((Writable) new Text("someKey"), new IntWritable(4), new Text("three"),
                    new DoubleWritable(3)));

    List<Writable> expected = Arrays.asList((Writable) new Text("someKey"), new IntWritable(10), new Text("one"),
                    new DoubleWritable(1));


    Schema schema = new Schema.Builder().addColumnString("key").addColumnInteger("intCol")
                    .addColumnString("textCol").addColumnString("doubleCol").build();

    Reducer reducer = new Reducer.Builder(ReduceOp.Sum).keyColumns("key")
                    .customReduction("textCol", new CustomReduceTakeSecond())
                    .customReduction("doubleCol", new CustomReduceTakeSecond()).build();

    reducer.setInputSchema(schema);


    IAggregableReduceOp<List<Writable>, List<Writable>> accumulator = reducer.aggregableReducer();

    for (int i = 0; i < inputs.size(); i++) {
        accumulator.accept(inputs.get(i));
    }
    List<Writable> out = accumulator.get();

    assertEquals(4, out.size());
    assertEquals(expected, out);

    //Check schema:
    String[] expNames = new String[] {"key", "sum(intCol)", "myCustomReduce(textCol)", "myCustomReduce(doubleCol)"};
    ColumnType[] expTypes =
                    new ColumnType[] {ColumnType.String, ColumnType.Integer, ColumnType.String, ColumnType.String};
    Schema outSchema = reducer.transform(schema);

    assertEquals(4, outSchema.numColumns());
    for (int i = 0; i < 4; i++) {
        assertEquals(expNames[i], outSchema.getName(i));
        assertEquals(expTypes[i], outSchema.getType(i));
    }
}
 
Example 16
Source File: TestMultiOpReduce.java    From deeplearning4j with Apache License 2.0 4 votes vote down vote up
@Test
public void testCustomReductionsWithCondition() {

    List<List<Writable>> inputs = new ArrayList<>();
    inputs.add(Arrays.asList((Writable) new Text("someKey"), new IntWritable(1), new Text("zero"),
            new DoubleWritable(0)));
    inputs.add(Arrays.asList((Writable) new Text("someKey"), new IntWritable(2), new Text("one"),
            new DoubleWritable(1)));
    inputs.add(Arrays.asList((Writable) new Text("someKey"), new IntWritable(3), new Text("two"),
            new DoubleWritable(2)));
    inputs.add(Arrays.asList((Writable) new Text("someKey"), new IntWritable(4), new Text("three"),
            new DoubleWritable(3)));

    List<Writable> expected = Arrays.asList((Writable) new Text("someKey"), new IntWritable(10), new IntWritable(3),
            new DoubleWritable(1));


    Schema schema = new Schema.Builder().addColumnString("key").addColumnInteger("intCol")
            .addColumnString("textCol").addColumnString("doubleCol").build();

    Reducer reducer = new Reducer.Builder(ReduceOp.Sum).keyColumns("key")
            .conditionalReduction("textCol", "condTextCol",
                    ReduceOp.Count, new StringColumnCondition("textCol", ConditionOp.NotEqual, "three"))
            .customReduction("doubleCol", new CustomReduceTakeSecond()).build();

    reducer.setInputSchema(schema);


    IAggregableReduceOp<List<Writable>, List<Writable>> accumulator = reducer.aggregableReducer();

    for (int i = 0; i < inputs.size(); i++) {
        accumulator.accept(inputs.get(i));
    }
    List<Writable> out = accumulator.get();

    assertEquals(4, out.size());
    assertEquals(expected, out);

    //Check schema:
    String[] expNames = new String[] {"key", "sum(intCol)", "condTextCol", "myCustomReduce(doubleCol)"};
    ColumnType[] expTypes =
            new ColumnType[] {ColumnType.String, ColumnType.Integer, ColumnType.Long, ColumnType.String};
    Schema outSchema = reducer.transform(schema);

    assertEquals(4, outSchema.numColumns());
    for (int i = 0; i < 4; i++) {
        assertEquals(expNames[i], outSchema.getName(i));
        assertEquals(expTypes[i], outSchema.getType(i));
    }
}