org.datavec.api.transform.ColumnType Java Examples

The following examples show how to use org.datavec.api.transform.ColumnType. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: TestConditions.java    From DataVec with Apache License 2.0 6 votes vote down vote up
@Test
public void testIntegerCondition() {
    Schema schema = TestTransforms.getSchema(ColumnType.Integer);

    Condition condition = new IntegerColumnCondition("column", SequenceConditionMode.Or, ConditionOp.LessThan, 0);
    condition.setInputSchema(schema);

    assertTrue(condition.condition(Collections.singletonList((Writable) new IntWritable(-1))));
    assertTrue(condition.condition(Collections.singletonList((Writable) new IntWritable(-2))));
    assertFalse(condition.condition(Collections.singletonList((Writable) new IntWritable(0))));
    assertFalse(condition.condition(Collections.singletonList((Writable) new IntWritable(1))));

    Set<Integer> set = new HashSet<>();
    set.add(0);
    set.add(3);
    condition = new IntegerColumnCondition("column", SequenceConditionMode.Or, ConditionOp.InSet, set);
    condition.setInputSchema(schema);
    assertTrue(condition.condition(Collections.singletonList((Writable) new IntWritable(0))));
    assertTrue(condition.condition(Collections.singletonList((Writable) new IntWritable(3))));
    assertFalse(condition.condition(Collections.singletonList((Writable) new IntWritable(1))));
    assertFalse(condition.condition(Collections.singletonList((Writable) new IntWritable(2))));
}
 
Example #2
Source File: TestConditions.java    From deeplearning4j with Apache License 2.0 6 votes vote down vote up
@Test
public void testIntegerCondition() {
    Schema schema = TestTransforms.getSchema(ColumnType.Integer);

    Condition condition = new IntegerColumnCondition("column", SequenceConditionMode.Or, ConditionOp.LessThan, 0);
    condition.setInputSchema(schema);

    assertTrue(condition.condition(Collections.singletonList((Writable) new IntWritable(-1))));
    assertTrue(condition.condition(Collections.singletonList((Writable) new IntWritable(-2))));
    assertFalse(condition.condition(Collections.singletonList((Writable) new IntWritable(0))));
    assertFalse(condition.condition(Collections.singletonList((Writable) new IntWritable(1))));

    Set<Integer> set = new HashSet<>();
    set.add(0);
    set.add(3);
    condition = new IntegerColumnCondition("column", SequenceConditionMode.Or, ConditionOp.InSet, set);
    condition.setInputSchema(schema);
    assertTrue(condition.condition(Collections.singletonList((Writable) new IntWritable(0))));
    assertTrue(condition.condition(Collections.singletonList((Writable) new IntWritable(3))));
    assertFalse(condition.condition(Collections.singletonList((Writable) new IntWritable(1))));
    assertFalse(condition.condition(Collections.singletonList((Writable) new IntWritable(2))));
}
 
Example #3
Source File: ArrowUtils.java    From konduit-serving with Apache License 2.0 6 votes vote down vote up
public static INDArray convertArrowVector(FieldVector fieldVector, ColumnType type) {
    DataBuffer buffer = null;
    int cols = fieldVector.getValueCount();
    ByteBuffer direct = ByteBuffer.allocateDirect(fieldVector.getDataBuffer().capacity());
    direct.order(ByteOrder.nativeOrder());
    fieldVector.getDataBuffer().getBytes(0, direct);
    direct.rewind();
    switch (type) {
        case Integer:
            buffer = Nd4j.createBuffer(direct, DataType.INT32, cols, 0L);
            break;
        case Float:
            buffer = Nd4j.createBuffer(direct, DataType.FLOAT, cols);
            break;
        case Double:
            buffer = Nd4j.createBuffer(direct, DataType.DOUBLE, cols);
            break;
        case Long:
            buffer = Nd4j.createBuffer(direct, DataType.INT64, cols);
    }

    return Nd4j.create(buffer, cols, 1);
}
 
Example #4
Source File: ArrowUtils.java    From konduit-serving with Apache License 2.0 6 votes vote down vote up
public static Field getFieldForColumn(String name, ColumnType columnType) {
    switch (columnType) {
        case Integer:
            return field(name, new ArrowType.Int(32, false));
        case Float:
            return field(name, new ArrowType.FloatingPoint(FloatingPointPrecision.SINGLE));
        case Double:
            return field(name, new ArrowType.FloatingPoint(FloatingPointPrecision.DOUBLE));
        case Long:
            return field(name, new ArrowType.Int(64, false));
        case NDArray:
            return field(name, new ArrowType.Binary());
        case Boolean:
            return field(name, new ArrowType.Bool());
        case Categorical:
            return field(name, new ArrowType.Utf8());
        case Time:
            return field(name, new ArrowType.Date(DateUnit.MILLISECOND));
        case Bytes:
            return field(name, new ArrowType.Binary());
        case String:
            return field(name, new ArrowType.Utf8());
        default:
            throw new IllegalArgumentException("Column type invalid " + columnType);
    }
}
 
Example #5
Source File: DataAnalysis.java    From deeplearning4j with Apache License 2.0 6 votes vote down vote up
@Deprecated //Legacy format, no longer used
private Map<String, List<Map<String, Object>>> getJsonRepresentation() {
    Map<String, List<Map<String, Object>>> jsonRepresentation = new LinkedHashMap<>();
    List<Map<String, Object>> list = new ArrayList<>();
    jsonRepresentation.put("DataAnalysis", list);

    for (String colName : schema.getColumnNames()) {
        Map<String, Object> current = new LinkedHashMap<>();
        int idx = schema.getIndexOfColumn(colName);
        current.put(COL_NAME, colName);
        current.put(COL_IDX, idx);
        ColumnType columnType = schema.getMetaData(colName).getColumnType();
        current.put(COL_TYPE, columnType);
        if (columnType == ColumnType.Categorical) {
            current.put(CATEGORICAL_STATE_NAMES,
                            ((CategoricalMetaData) schema.getMetaData(colName)).getStateNames());
        }
        current.put(ANALYSIS, Collections.singletonMap(columnAnalysis.get(idx).getClass().getSimpleName(),
                        columnAnalysis.get(idx)));

        list.add(current);
    }

    return jsonRepresentation;
}
 
Example #6
Source File: Schema.java    From deeplearning4j with Apache License 2.0 6 votes vote down vote up
@Override
public String toString() {
    StringBuilder sb = new StringBuilder();
    int nCol = numColumns();

    int maxNameLength = 0;
    for (String s : getColumnNames()) {
        maxNameLength = Math.max(maxNameLength, s.length());
    }

    //Header:
    sb.append("Schema():\n");
    sb.append(String.format("%-6s", "idx")).append(String.format("%-" + (maxNameLength + 8) + "s", "name"))
                    .append(String.format("%-15s", "type")).append("meta data").append("\n");

    for (int i = 0; i < nCol; i++) {
        String colName = getName(i);
        ColumnType type = getType(i);
        ColumnMetaData meta = getMetaData(i);
        String paddedName = String.format("%-" + (maxNameLength + 8) + "s", "\"" + colName + "\"");
        sb.append(String.format("%-6d", i)).append(paddedName).append(String.format("%-15s", type)).append(meta)
                        .append("\n");
    }

    return sb.toString();
}
 
Example #7
Source File: TestSchemaMethods.java    From deeplearning4j with Apache License 2.0 6 votes vote down vote up
@Test
public void testNumberedColumnAdding() {

    Schema schema = new Schema.Builder().addColumnsDouble("doubleCol_%d", 0, 2).addColumnsLong("longCol_%d", 3, 5)
                    .addColumnsString("stringCol_%d", 6, 8).build();

    assertEquals(9, schema.numColumns());

    for (int i = 0; i < 9; i++) {
        if (i <= 2) {
            assertEquals("doubleCol_" + i, schema.getName(i));
            assertEquals(ColumnType.Double, schema.getType(i));
        } else if (i <= 5) {
            assertEquals("longCol_" + i, schema.getName(i));
            assertEquals(ColumnType.Long, schema.getType(i));
        } else {
            assertEquals("stringCol_" + i, schema.getName(i));
            assertEquals(ColumnType.String, schema.getType(i));
        }
    }

}
 
Example #8
Source File: ArrowConverter.java    From DataVec with Apache License 2.0 6 votes vote down vote up
/**
 * Create a field given the input {@link ColumnType}
 * and name
 * @param name the name of the field
 * @param columnType the column type to add
 * @return
 */
public static Field getFieldForColumn(String name,ColumnType columnType) {
    switch(columnType) {
        case Long: return field(name,new ArrowType.Int(64,false));
        case Integer: return field(name,new ArrowType.Int(32,false));
        case Double: return field(name,new ArrowType.FloatingPoint(FloatingPointPrecision.DOUBLE));
        case Float: return field(name,new ArrowType.FloatingPoint(FloatingPointPrecision.SINGLE));
        case Boolean: return field(name, new ArrowType.Bool());
        case Categorical: return field(name,new ArrowType.Utf8());
        case Time: return field(name,new ArrowType.Date(DateUnit.MILLISECOND));
        case Bytes: return field(name,new ArrowType.Binary());
        case NDArray: return field(name,new ArrowType.Binary());
        case String: return field(name,new ArrowType.Utf8());

        default: throw new IllegalArgumentException("Column type invalid " + columnType);
    }
}
 
Example #9
Source File: TestGeoTransforms.java    From deeplearning4j with Apache License 2.0 6 votes vote down vote up
@Test
public void testCoordinatesDistanceTransform() throws Exception {
    Schema schema = new Schema.Builder().addColumnString("point").addColumnString("mean").addColumnString("stddev")
                    .build();

    Transform transform = new CoordinatesDistanceTransform("dist", "point", "mean", "stddev", "\\|");
    transform.setInputSchema(schema);

    Schema out = transform.transform(schema);
    assertEquals(4, out.numColumns());
    assertEquals(Arrays.asList("point", "mean", "stddev", "dist"), out.getColumnNames());
    assertEquals(Arrays.asList(ColumnType.String, ColumnType.String, ColumnType.String, ColumnType.Double),
                    out.getColumnTypes());

    assertEquals(Arrays.asList((Writable) new Text("-30"), new Text("20"), new Text("10"), new DoubleWritable(5.0)),
                    transform.map(Arrays.asList((Writable) new Text("-30"), new Text("20"), new Text("10"))));
    assertEquals(Arrays.asList((Writable) new Text("50|40"), new Text("10|-20"), new Text("10|5"),
                    new DoubleWritable(Math.sqrt(160))),
                    transform.map(Arrays.asList((Writable) new Text("50|40"), new Text("10|-20"),
                                    new Text("10|5"))));
}
 
Example #10
Source File: TestConditions.java    From deeplearning4j with Apache License 2.0 6 votes vote down vote up
@Test
public void testStringRegexCondition() {

    Schema schema = TestTransforms.getSchema(ColumnType.String);

    //Condition: String value starts with "abc"
    Condition condition = new StringRegexColumnCondition("column", "abc.*");
    condition.setInputSchema(schema);

    assertTrue(condition.condition(Collections.singletonList((Writable) new Text("abc"))));
    assertTrue(condition.condition(Collections.singletonList((Writable) new Text("abcdefghijk"))));
    assertTrue(condition.condition(Collections.singletonList((Writable) new Text("abc more text \tetc"))));
    assertFalse(condition.condition(Collections.singletonList((Writable) new Text("ab"))));
    assertFalse(condition.condition(Collections.singletonList((Writable) new Text("also doesn't match"))));
    assertFalse(condition.condition(Collections.singletonList((Writable) new Text(" abc"))));

    //Check application on non-String columns
    schema = TestTransforms.getSchema(ColumnType.Integer);
    condition = new StringRegexColumnCondition("column", "123\\d*");
    condition.setInputSchema(schema);

    assertTrue(condition.condition(Collections.singletonList((Writable) new IntWritable(123))));
    assertTrue(condition.condition(Collections.singletonList((Writable) new IntWritable(123456))));
    assertFalse(condition.condition(Collections.singletonList((Writable) new IntWritable(-123))));
    assertFalse(condition.condition(Collections.singletonList((Writable) new IntWritable(456789))));
}
 
Example #11
Source File: TestGeoTransforms.java    From DataVec with Apache License 2.0 6 votes vote down vote up
@Test
public void testCoordinatesDistanceTransform() throws Exception {
    Schema schema = new Schema.Builder().addColumnString("point").addColumnString("mean").addColumnString("stddev")
                    .build();

    Transform transform = new CoordinatesDistanceTransform("dist", "point", "mean", "stddev", "\\|");
    transform.setInputSchema(schema);

    Schema out = transform.transform(schema);
    assertEquals(4, out.numColumns());
    assertEquals(Arrays.asList("point", "mean", "stddev", "dist"), out.getColumnNames());
    assertEquals(Arrays.asList(ColumnType.String, ColumnType.String, ColumnType.String, ColumnType.Double),
                    out.getColumnTypes());

    assertEquals(Arrays.asList((Writable) new Text("-30"), new Text("20"), new Text("10"), new DoubleWritable(5.0)),
                    transform.map(Arrays.asList((Writable) new Text("-30"), new Text("20"), new Text("10"))));
    assertEquals(Arrays.asList((Writable) new Text("50|40"), new Text("10|-20"), new Text("10|5"),
                    new DoubleWritable(Math.sqrt(160))),
                    transform.map(Arrays.asList((Writable) new Text("50|40"), new Text("10|-20"),
                                    new Text("10|5"))));
}
 
Example #12
Source File: ArrowConverter.java    From deeplearning4j with Apache License 2.0 6 votes vote down vote up
/**
 * Create a field given the input {@link ColumnType}
 * and name
 * @param name the name of the field
 * @param columnType the column type to add
 * @return
 */
public static Field getFieldForColumn(String name,ColumnType columnType) {
    switch(columnType) {
        case Long: return field(name,new ArrowType.Int(64,false));
        case Integer: return field(name,new ArrowType.Int(32,false));
        case Double: return field(name,new ArrowType.FloatingPoint(FloatingPointPrecision.DOUBLE));
        case Float: return field(name,new ArrowType.FloatingPoint(FloatingPointPrecision.SINGLE));
        case Boolean: return field(name, new ArrowType.Bool());
        case Categorical: return field(name,new ArrowType.Utf8());
        case Time: return field(name,new ArrowType.Date(DateUnit.MILLISECOND));
        case Bytes: return field(name,new ArrowType.Binary());
        case NDArray: return field(name,new ArrowType.Binary());
        case String: return field(name,new ArrowType.Utf8());

        default: throw new IllegalArgumentException("Column type invalid " + columnType);
    }
}
 
Example #13
Source File: TestGeoTransforms.java    From deeplearning4j with Apache License 2.0 6 votes vote down vote up
@Test
public void testIPAddressToLocationTransform() throws Exception {
    Schema schema = new Schema.Builder().addColumnString("column").build();
    LocationType[] locationTypes = LocationType.values();
    String in = "81.2.69.160";
    String[] locations = {"London", "2643743", "Europe", "6255148", "United Kingdom", "2635167",
                    "51.5142:-0.0931", "", "England", "6269131"};    //Note: no postcode in this test DB for this record

    for (int i = 0; i < locationTypes.length; i++) {
        LocationType locationType = locationTypes[i];
        String location = locations[i];

        Transform transform = new IPAddressToLocationTransform("column", locationType);
        transform.setInputSchema(schema);

        Schema out = transform.transform(schema);

        assertEquals(1, out.getColumnMetaData().size());
        assertEquals(ColumnType.String, out.getMetaData(0).getColumnType());

        List<Writable> writables = transform.map(Collections.singletonList((Writable) new Text(in)));
        assertEquals(1, writables.size());
        assertEquals(location, writables.get(0).toString());
        //System.out.println(location);
    }
}
 
Example #14
Source File: AnalyzeSpark.java    From deeplearning4j with Apache License 2.0 6 votes vote down vote up
public static DataAnalysis analyze(Schema schema, JavaRDD<List<Writable>> data, int maxHistogramBuckets) {
    data.cache();
    /*
     * TODO: Some care should be given to add histogramBuckets and histogramBucketCounts to this in the future
     */

    List<ColumnType> columnTypes = schema.getColumnTypes();
    List<AnalysisCounter> counters =
                    data.aggregate(null, new AnalysisAddFunction(schema), new AnalysisCombineFunction());

    double[][] minsMaxes = new double[counters.size()][2];
    List<ColumnAnalysis> list = DataVecAnalysisUtils.convertCounters(counters, minsMaxes, columnTypes);

    List<HistogramCounter> histogramCounters =
                    data.aggregate(null, new HistogramAddFunction(maxHistogramBuckets, schema, minsMaxes),
                                    new HistogramCombineFunction());

    DataVecAnalysisUtils.mergeCounters(list, histogramCounters);
    return new DataAnalysis(schema, list);
}
 
Example #15
Source File: Schema.java    From DataVec with Apache License 2.0 6 votes vote down vote up
@Override
public String toString() {
    StringBuilder sb = new StringBuilder();
    int nCol = numColumns();

    int maxNameLength = 0;
    for (String s : getColumnNames()) {
        maxNameLength = Math.max(maxNameLength, s.length());
    }

    //Header:
    sb.append("Schema():\n");
    sb.append(String.format("%-6s", "idx")).append(String.format("%-" + (maxNameLength + 8) + "s", "name"))
                    .append(String.format("%-15s", "type")).append("meta data").append("\n");

    for (int i = 0; i < nCol; i++) {
        String colName = getName(i);
        ColumnType type = getType(i);
        ColumnMetaData meta = getMetaData(i);
        String paddedName = String.format("%-" + (maxNameLength + 8) + "s", "\"" + colName + "\"");
        sb.append(String.format("%-6d", i)).append(paddedName).append(String.format("%-15s", type)).append(meta)
                        .append("\n");
    }

    return sb.toString();
}
 
Example #16
Source File: TestGeoTransforms.java    From deeplearning4j with Apache License 2.0 6 votes vote down vote up
@Test
public void testIPAddressToLocationTransform() throws Exception {
    Schema schema = new Schema.Builder().addColumnString("column").build();
    LocationType[] locationTypes = LocationType.values();
    String in = "81.2.69.160";
    String[] locations = {"London", "2643743", "Europe", "6255148", "United Kingdom", "2635167",
            "51.5142:-0.0931", "", "England", "6269131"};    //Note: no postcode in this test DB for this record

    for (int i = 0; i < locationTypes.length; i++) {
        LocationType locationType = locationTypes[i];
        String location = locations[i];

        Transform transform = new IPAddressToLocationTransform("column", locationType);
        transform.setInputSchema(schema);

        Schema out = transform.transform(schema);

        assertEquals(1, out.getColumnMetaData().size());
        assertEquals(ColumnType.String, out.getMetaData(0).getColumnType());

        List<Writable> writables = transform.map(Collections.singletonList((Writable) new Text(in)));
        assertEquals(1, writables.size());
        assertEquals(location, writables.get(0).toString());
        //System.out.println(location);
    }
}
 
Example #17
Source File: TestConditions.java    From deeplearning4j with Apache License 2.0 6 votes vote down vote up
@Test
public void testStringCondition() {
    Schema schema = TestTransforms.getSchema(ColumnType.Integer);

    Condition condition = new StringColumnCondition("column", SequenceConditionMode.Or, ConditionOp.Equal, "value");
    condition.setInputSchema(schema);

    assertTrue(condition.condition(Collections.singletonList((Writable) new Text("value"))));
    assertFalse(condition.condition(Collections.singletonList((Writable) new Text("not_value"))));

    Set<String> set = new HashSet<>();
    set.add("in set");
    set.add("also in set");
    condition = new StringColumnCondition("column", SequenceConditionMode.Or, ConditionOp.InSet, set);
    condition.setInputSchema(schema);
    assertTrue(condition.condition(Collections.singletonList((Writable) new Text("in set"))));
    assertTrue(condition.condition(Collections.singletonList((Writable) new Text("also in set"))));
    assertFalse(condition.condition(Collections.singletonList((Writable) new Text("not in the set"))));
    assertFalse(condition.condition(Collections.singletonList((Writable) new Text(":)"))));
}
 
Example #18
Source File: TestConditions.java    From DataVec with Apache License 2.0 6 votes vote down vote up
@Test
public void testInvalidValueColumnConditionCondition() {
    Schema schema = TestTransforms.getSchema(ColumnType.Integer);

    Condition condition = new InvalidValueColumnCondition("column");
    condition.setInputSchema(schema);

    assertFalse(condition.condition(Collections.singletonList((Writable) new IntWritable(-1)))); //Not invalid -> condition does not apply
    assertFalse(condition.condition(Collections.singletonList((Writable) new IntWritable(-2))));
    assertFalse(condition.condition(Collections.singletonList((Writable) new LongWritable(1000))));
    assertFalse(condition.condition(Collections.singletonList((Writable) new Text("1000"))));
    assertTrue(condition.condition(Collections.singletonList((Writable) new Text("text"))));
    assertTrue(condition.condition(Collections.singletonList((Writable) new Text("NaN"))));
    assertTrue(condition.condition(
                    Collections.singletonList((Writable) new LongWritable(1L + (long) Integer.MAX_VALUE))));
    assertTrue(condition.condition(Collections.singletonList((Writable) new DoubleWritable(3.14159))));
}
 
Example #19
Source File: OverlappingTimeWindowFunction.java    From deeplearning4j with Apache License 2.0 6 votes vote down vote up
@Override
public void setInputSchema(Schema schema) {
    if (!(schema instanceof SequenceSchema))
        throw new IllegalArgumentException(
                        "Invalid schema: OverlappingTimeWindowFunction can only operate on SequenceSchema");
    if (!schema.hasColumn(timeColumn))
        throw new IllegalStateException("Input schema does not have a column with name \"" + timeColumn + "\"");

    if (schema.getMetaData(timeColumn).getColumnType() != ColumnType.Time)
        throw new IllegalStateException("Invalid column: column \"" + timeColumn + "\" is not of type "
                        + ColumnType.Time + "; is " + schema.getMetaData(timeColumn).getColumnType());

    this.inputSchema = schema;

    timeZone = ((TimeMetaData) schema.getMetaData(timeColumn)).getTimeZone();
}
 
Example #20
Source File: TimeWindowFunction.java    From deeplearning4j with Apache License 2.0 6 votes vote down vote up
@Override
public void setInputSchema(Schema schema) {
    if (!(schema instanceof SequenceSchema))
        throw new IllegalArgumentException(
                        "Invalid schema: TimeWindowFunction can " + "only operate on SequenceSchema");
    if (!schema.hasColumn(timeColumn))
        throw new IllegalStateException("Input schema does not have a column with name \"" + timeColumn + "\"");

    if (schema.getMetaData(timeColumn).getColumnType() != ColumnType.Time)
        throw new IllegalStateException("Invalid column: column \"" + timeColumn + "\" is not of type "
                        + ColumnType.Time + "; is " + schema.getMetaData(timeColumn).getColumnType());

    this.inputSchema = schema;

    timeZone = ((TimeMetaData) schema.getMetaData(timeColumn)).getTimeZone();
}
 
Example #21
Source File: OverlappingTimeWindowFunction.java    From DataVec with Apache License 2.0 6 votes vote down vote up
@Override
public void setInputSchema(Schema schema) {
    if (!(schema instanceof SequenceSchema))
        throw new IllegalArgumentException(
                        "Invalid schema: OverlappingTimeWindowFunction can only operate on SequenceSchema");
    if (!schema.hasColumn(timeColumn))
        throw new IllegalStateException("Input schema does not have a column with name \"" + timeColumn + "\"");

    if (schema.getMetaData(timeColumn).getColumnType() != ColumnType.Time)
        throw new IllegalStateException("Invalid column: column \"" + timeColumn + "\" is not of type "
                        + ColumnType.Time + "; is " + schema.getMetaData(timeColumn).getColumnType());

    this.inputSchema = schema;

    timeZone = ((TimeMetaData) schema.getMetaData(timeColumn)).getTimeZone();
}
 
Example #22
Source File: TestConditions.java    From DataVec with Apache License 2.0 6 votes vote down vote up
@Test
public void testStringCondition() {
    Schema schema = TestTransforms.getSchema(ColumnType.Integer);

    Condition condition = new StringColumnCondition("column", SequenceConditionMode.Or, ConditionOp.Equal, "value");
    condition.setInputSchema(schema);

    assertTrue(condition.condition(Collections.singletonList((Writable) new Text("value"))));
    assertFalse(condition.condition(Collections.singletonList((Writable) new Text("not_value"))));

    Set<String> set = new HashSet<>();
    set.add("in set");
    set.add("also in set");
    condition = new StringColumnCondition("column", SequenceConditionMode.Or, ConditionOp.InSet, set);
    condition.setInputSchema(schema);
    assertTrue(condition.condition(Collections.singletonList((Writable) new Text("in set"))));
    assertTrue(condition.condition(Collections.singletonList((Writable) new Text("also in set"))));
    assertFalse(condition.condition(Collections.singletonList((Writable) new Text("not in the set"))));
    assertFalse(condition.condition(Collections.singletonList((Writable) new Text(":)"))));
}
 
Example #23
Source File: AggregableReductionUtils.java    From DataVec with Apache License 2.0 6 votes vote down vote up
public static IAggregableReduceOp<Writable, List<Writable>> reduceColumn(List<ReduceOp> op, ColumnType type,
                boolean ignoreInvalid, ColumnMetaData metaData) {
    switch (type) {
        case Integer:
            return reduceIntColumn(op, ignoreInvalid, metaData);
        case Long:
            return reduceLongColumn(op, ignoreInvalid, metaData);
        case Float:
            return reduceFloatColumn(op, ignoreInvalid, metaData);
        case Double:
            return reduceDoubleColumn(op, ignoreInvalid, metaData);
        case String:
        case Categorical:
            return reduceStringOrCategoricalColumn(op, ignoreInvalid, metaData);
        case Time:
            return reduceTimeColumn(op, ignoreInvalid, metaData);
        case Bytes:
            return reduceBytesColumn(op, ignoreInvalid, metaData);
        default:
            throw new UnsupportedOperationException("Unknown or not implemented column type: " + type);
    }
}
 
Example #24
Source File: TestGeoTransforms.java    From deeplearning4j with Apache License 2.0 6 votes vote down vote up
@Test
public void testCoordinatesDistanceTransform() throws Exception {
    Schema schema = new Schema.Builder().addColumnString("point").addColumnString("mean").addColumnString("stddev")
                    .build();

    Transform transform = new CoordinatesDistanceTransform("dist", "point", "mean", "stddev", "\\|");
    transform.setInputSchema(schema);

    Schema out = transform.transform(schema);
    assertEquals(4, out.numColumns());
    assertEquals(Arrays.asList("point", "mean", "stddev", "dist"), out.getColumnNames());
    assertEquals(Arrays.asList(ColumnType.String, ColumnType.String, ColumnType.String, ColumnType.Double),
                    out.getColumnTypes());

    assertEquals(Arrays.asList((Writable) new Text("-30"), new Text("20"), new Text("10"), new DoubleWritable(5.0)),
                    transform.map(Arrays.asList((Writable) new Text("-30"), new Text("20"), new Text("10"))));
    assertEquals(Arrays.asList((Writable) new Text("50|40"), new Text("10|-20"), new Text("10|5"),
                    new DoubleWritable(Math.sqrt(160))),
                    transform.map(Arrays.asList((Writable) new Text("50|40"), new Text("10|-20"),
                                    new Text("10|5"))));
}
 
Example #25
Source File: SequenceSplitTimeSeparation.java    From DataVec with Apache License 2.0 5 votes vote down vote up
@Override
public void setInputSchema(Schema inputSchema) {
    if (!inputSchema.hasColumn(timeColumn))
        throw new IllegalStateException(
                        "Invalid state: schema does not have column " + "with name \"" + timeColumn + "\"");
    if (inputSchema.getMetaData(timeColumn).getColumnType() != ColumnType.Time) {
        throw new IllegalStateException("Invalid input schema: schema column \"" + timeColumn
                        + "\" is not a time column." + " (Is type: "
                        + inputSchema.getMetaData(timeColumn).getColumnType() + ")");
    }

    this.timeColumnIdx = inputSchema.getIndexOfColumn(timeColumn);
    this.schema = inputSchema;
}
 
Example #26
Source File: SequenceMovingWindowReduceTransform.java    From deeplearning4j with Apache License 2.0 5 votes vote down vote up
@Override
public List<List<Writable>> mapSequence(List<List<Writable>> sequence) {
    int colIdx = inputSchema.getIndexOfColumn(columnName);
    ColumnType columnType = inputSchema.getType(colIdx);
    List<List<Writable>> out = new ArrayList<>(sequence.size());
    LinkedList<Writable> window = new LinkedList<>();
    for (int i = 0; i < sequence.size(); i++) {
        Writable current = sequence.get(i).get(colIdx);
        window.addLast(current);
        if (window.size() > lookback) {
            window.removeFirst();
        }
        Writable reduced;
        if (window.size() < lookback && edgeCaseHandling == EdgeCaseHandling.SpecifiedValue) {
            reduced = edgeCaseValue;
        } else {
            IAggregableReduceOp<Writable, List<Writable>> reductionOp = AggregableReductionUtils
                            .reduceColumn(Collections.singletonList(op), columnType, false, null);
            for (Writable w : window) {
                reductionOp.accept(w);
            }
            reduced = reductionOp.get().get(0);
        }
        ArrayList<Writable> outThisStep = new ArrayList<>(sequence.get(i).size() + 1);
        outThisStep.addAll(sequence.get(i));
        outThisStep.add(reduced);
        out.add(outThisStep);
    }

    return out;
}
 
Example #27
Source File: TestConditions.java    From DataVec with Apache License 2.0 5 votes vote down vote up
@Test
public void testNullWritableColumnCondition() {
    Schema schema = TestTransforms.getSchema(ColumnType.Time);

    Condition condition = new NullWritableColumnCondition("column");
    condition.setInputSchema(schema);

    assertTrue(condition.condition(Collections.singletonList((Writable) NullWritable.INSTANCE)));
    assertTrue(condition.condition(Collections.singletonList((Writable) new NullWritable())));
    assertFalse(condition.condition(Collections.singletonList((Writable) new IntWritable(0))));
    assertFalse(condition.condition(Collections.singletonList((Writable) new Text("1"))));
}
 
Example #28
Source File: ArrowConverter.java    From deeplearning4j with Apache License 2.0 5 votes vote down vote up
/**
 * Based on an input {@link ColumnType}
 * get an entry from a {@link FieldVector}
 *
 * @param item the row of the item to get from the column vector
 * @param from the column vector from
 * @param columnType the column type
 * @return the resulting writable
 */
public static Writable fromEntry(int item,FieldVector from,ColumnType columnType) {
    if(from.getValueCount() < item) {
        throw new IllegalArgumentException("Index specified greater than the number of items in the vector with length " + from.getValueCount());
    }

    switch(columnType) {
        case Integer:
            return new IntWritable(getIntFromFieldVector(item,from));
        case Long:
            return new LongWritable(getLongFromFieldVector(item,from));
        case Float:
            return new FloatWritable(getFloatFromFieldVector(item,from));
        case Double:
            return new DoubleWritable(getDoubleFromFieldVector(item,from));
        case Boolean:
            BitVector bitVector = (BitVector) from;
            return new BooleanWritable(bitVector.get(item) > 0);
        case Categorical:
            VarCharVector varCharVector = (VarCharVector) from;
            return new Text(varCharVector.get(item));
        case String:
            VarCharVector varCharVector2 = (VarCharVector) from;
            return new Text(varCharVector2.get(item));
        case Time:
            //TODO: need to look at closer
            return new LongWritable(getLongFromFieldVector(item,from));
        case NDArray:
            VarBinaryVector valueVector = (VarBinaryVector) from;
            byte[] bytes = valueVector.get(item);
            ByteBuffer direct = ByteBuffer.allocateDirect(bytes.length);
            direct.put(bytes);
            INDArray fromTensor = BinarySerde.toArray(direct);
            return new NDArrayWritable(fromTensor);
        default:
            throw new IllegalArgumentException("Illegal type " + from.getClass().getName());
    }
}
 
Example #29
Source File: TestCalculateSortedRank.java    From deeplearning4j with Apache License 2.0 5 votes vote down vote up
@Test
public void testCalculateSortedRank() {

    List<List<Writable>> data = new ArrayList<>();
    data.add(Arrays.asList((Writable) new Text("0"), new DoubleWritable(0.0)));
    data.add(Arrays.asList((Writable) new Text("3"), new DoubleWritable(0.3)));
    data.add(Arrays.asList((Writable) new Text("2"), new DoubleWritable(0.2)));
    data.add(Arrays.asList((Writable) new Text("1"), new DoubleWritable(0.1)));

    List<List<Writable>> rdd = (data);

    Schema schema = new Schema.Builder().addColumnsString("TextCol").addColumnDouble("DoubleCol").build();

    TransformProcess tp = new TransformProcess.Builder(schema)
                    .calculateSortedRank("rank", "DoubleCol", new DoubleWritableComparator()).build();

    Schema outSchema = tp.getFinalSchema();
    assertEquals(3, outSchema.numColumns());
    assertEquals(Arrays.asList("TextCol", "DoubleCol", "rank"), outSchema.getColumnNames());
    assertEquals(Arrays.asList(ColumnType.String, ColumnType.Double, ColumnType.Long), outSchema.getColumnTypes());

    List<List<Writable>> out = LocalTransformExecutor.execute(rdd, tp);

    List<List<Writable>> collected = out;
    assertEquals(4, collected.size());
    for (int i = 0; i < 4; i++)
        assertEquals(3, collected.get(i).size());

    for (List<Writable> example : collected) {
        int exampleNum = example.get(0).toInt();
        int rank = example.get(2).toInt();
        assertEquals(exampleNum, rank);
    }
}
 
Example #30
Source File: DataQualityAnalysis.java    From DataVec with Apache License 2.0 5 votes vote down vote up
@Override
public String toString() {
    StringBuilder sb = new StringBuilder();
    int nCol = schema.numColumns();

    int maxNameLength = 0;
    for (String s : schema.getColumnNames()) {
        maxNameLength = Math.max(maxNameLength, s.length());
    }

    //Header:
    sb.append(String.format("%-6s", "idx")).append(String.format("%-" + (maxNameLength + 8) + "s", "name"))
                    .append(String.format("%-15s", "type")).append(String.format("%-10s", "quality"))
                    .append("details").append("\n");

    for (int i = 0; i < nCol; i++) {
        String colName = schema.getName(i);
        ColumnType type = schema.getType(i);
        ColumnQuality columnQuality = columnQualityList.get(i);
        boolean pass = columnQuality.getCountInvalid() == 0L && columnQuality.getCountMissing() == 0L;
        String paddedName = String.format("%-" + (maxNameLength + 8) + "s", "\"" + colName + "\"");
        sb.append(String.format("%-6d", i)).append(paddedName).append(String.format("%-15s", type))
                        .append(String.format("%-10s", (pass ? "ok" : "FAIL"))).append(columnQuality).append("\n");
    }

    return sb.toString();
}