org.datavec.api.transform.metadata.ColumnMetaData Java Examples

The following examples show how to use org.datavec.api.transform.metadata.ColumnMetaData. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: AggregableReductionUtils.java    From DataVec with Apache License 2.0 6 votes vote down vote up
public static IAggregableReduceOp<Writable, List<Writable>> reduceColumn(List<ReduceOp> op, ColumnType type,
                boolean ignoreInvalid, ColumnMetaData metaData) {
    switch (type) {
        case Integer:
            return reduceIntColumn(op, ignoreInvalid, metaData);
        case Long:
            return reduceLongColumn(op, ignoreInvalid, metaData);
        case Float:
            return reduceFloatColumn(op, ignoreInvalid, metaData);
        case Double:
            return reduceDoubleColumn(op, ignoreInvalid, metaData);
        case String:
        case Categorical:
            return reduceStringOrCategoricalColumn(op, ignoreInvalid, metaData);
        case Time:
            return reduceTimeColumn(op, ignoreInvalid, metaData);
        case Bytes:
            return reduceBytesColumn(op, ignoreInvalid, metaData);
        default:
            throw new UnsupportedOperationException("Unknown or not implemented column type: " + type);
    }
}
 
Example #2
Source File: OverlappingTimeWindowFunction.java    From DataVec with Apache License 2.0 6 votes vote down vote up
@Override
public Schema transform(Schema inputSchema) {
    if (!addWindowStartTimeColumn && !addWindowEndTimeColumn)
        return inputSchema;
    List<ColumnMetaData> newMeta = new ArrayList<>();

    newMeta.addAll(inputSchema.getColumnMetaData());

    if (addWindowStartTimeColumn) {
        newMeta.add(new TimeMetaData("windowStartTime"));
    }

    if (addWindowEndTimeColumn) {
        newMeta.add(new TimeMetaData("windowEndTime"));
    }

    return inputSchema.newSchema(newMeta);
}
 
Example #3
Source File: CategoricalToIntegerTransform.java    From deeplearning4j with Apache License 2.0 6 votes vote down vote up
@Override
public void setInputSchema(Schema inputSchema) {
    super.setInputSchema(inputSchema);

    columnIdx = inputSchema.getIndexOfColumn(columnName);
    ColumnMetaData meta = inputSchema.getMetaData(columnName);
    if (!(meta instanceof CategoricalMetaData))
        throw new IllegalStateException("Cannot convert column \"" + columnName
                        + "\" from categorical to one-hot: column is not categorical (is: " + meta.getColumnType()
                        + ")");
    this.stateNames = ((CategoricalMetaData) meta).getStateNames();

    this.statesMap = new HashMap<>(stateNames.size());
    for (int i = 0; i < stateNames.size(); i++) {
        this.statesMap.put(stateNames.get(i), i);
    }
}
 
Example #4
Source File: OverlappingTimeWindowFunction.java    From deeplearning4j with Apache License 2.0 6 votes vote down vote up
@Override
public Schema transform(Schema inputSchema) {
    if (!addWindowStartTimeColumn && !addWindowEndTimeColumn)
        return inputSchema;
    List<ColumnMetaData> newMeta = new ArrayList<>();

    newMeta.addAll(inputSchema.getColumnMetaData());

    if (addWindowStartTimeColumn) {
        newMeta.add(new TimeMetaData("windowStartTime"));
    }

    if (addWindowEndTimeColumn) {
        newMeta.add(new TimeMetaData("windowEndTime"));
    }

    return inputSchema.newSchema(newMeta);
}
 
Example #5
Source File: FilterInvalidValues.java    From deeplearning4j with Apache License 2.0 6 votes vote down vote up
private boolean filterColumn(List<?> row, int i) {
    ColumnMetaData meta = schema.getMetaData(i);
    if (row.get(i) instanceof Float) {
        if (!meta.isValid(new FloatWritable((Float) row.get(i))))
            return true;
    } else if (row.get(i) instanceof Double) {
        if (!meta.isValid(new DoubleWritable((Double) row.get(i))))
            return true;
    } else if (row.get(i) instanceof String) {
        if (!meta.isValid(new Text(((String) row.get(i)).toString())))
            return true;
    } else if (row.get(i) instanceof Integer) {
        if (!meta.isValid(new IntWritable((Integer) row.get(i))))
            return true;

    } else if (row.get(i) instanceof Long) {
        if (!meta.isValid(new LongWritable((Long) row.get(i))))
            return true;
    } else if (row.get(i) instanceof Boolean) {
        if (!meta.isValid(new BooleanWritable((Boolean) row.get(i))))
            return true;
    }
    return false;
}
 
Example #6
Source File: Join.java    From DataVec with Apache License 2.0 6 votes vote down vote up
public Schema getOutputSchema() {
    if (leftSchema == null)
        throw new IllegalStateException("Left schema is not set (null)");
    if (rightSchema == null)
        throw new IllegalStateException("Right schema is not set (null)");
    if (joinColumnsLeft == null)
        throw new IllegalStateException("Left key columns are not set (null)");
    if (joinColumnsRight == null)
        throw new IllegalArgumentException("Right key columns are not set (null");

    //Approach here: take the left schema, plus the right schema (excluding the key columns from the right schema)
    List<ColumnMetaData> metaDataOut = new ArrayList<>(leftSchema.getColumnMetaData());

    Set<String> keySetRight = new HashSet<>();
    Collections.addAll(keySetRight, joinColumnsRight);

    for (ColumnMetaData rightMeta : rightSchema.getColumnMetaData()) {
        if (keySetRight.contains(rightMeta.getName()))
            continue;;
        metaDataOut.add(rightMeta);
    }

    return leftSchema.newSchema(metaDataOut);
}
 
Example #7
Source File: RemoveAllColumnsExceptForTransform.java    From DataVec with Apache License 2.0 6 votes vote down vote up
@Override
public Schema transform(Schema schema) {
    List<String> origNames = schema.getColumnNames();
    List<ColumnMetaData> origMeta = schema.getColumnMetaData();

    Set<String> keepSet = new HashSet<>();
    Collections.addAll(keepSet, columnsToKeep);


    List<ColumnMetaData> newMeta = new ArrayList<>(columnsToKeep.length);

    Iterator<String> namesIter = origNames.iterator();
    Iterator<ColumnMetaData> metaIter = origMeta.iterator();

    while (namesIter.hasNext()) {
        String n = namesIter.next();
        ColumnMetaData t = metaIter.next();
        if (keepSet.contains(n)) {
            newMeta.add(t);
        }
    }

    return schema.newSchema(newMeta);
}
 
Example #8
Source File: DoubleMathOpTransform.java    From DataVec with Apache License 2.0 6 votes vote down vote up
@Override
public ColumnMetaData getNewColumnMetaData(String newColumnName, ColumnMetaData oldColumnType) {
    if (!(oldColumnType instanceof DoubleMetaData))
        throw new IllegalStateException("Column is not an double column");
    DoubleMetaData meta = (DoubleMetaData) oldColumnType;
    Double minValue = meta.getMinAllowedValue();
    Double maxValue = meta.getMaxAllowedValue();
    if (minValue != null)
        minValue = doOp(minValue);
    if (maxValue != null)
        maxValue = doOp(maxValue);
    if (minValue != null && maxValue != null && minValue > maxValue) {
        //Consider rsub 1, with original min/max of 0 and 1: (1-0) -> 1 and (1-1) -> 0
        //Or multiplication by -1: (0 to 1) -> (-1 to 0)
        //Need to swap min/max here...
        Double temp = minValue;
        minValue = maxValue;
        maxValue = temp;
    }
    return new DoubleMetaData(newColumnName, minValue, maxValue);
}
 
Example #9
Source File: BaseColumnTransform.java    From DataVec with Apache License 2.0 6 votes vote down vote up
@Override
public Schema transform(Schema schema) {
    if (columnNumber == -1)
        throw new IllegalStateException("columnNumber == -1 -> setInputSchema not called?");
    List<ColumnMetaData> oldMeta = schema.getColumnMetaData();
    List<ColumnMetaData> newMeta = new ArrayList<>(oldMeta.size());

    Iterator<ColumnMetaData> typesIter = oldMeta.iterator();

    int i = 0;
    while (typesIter.hasNext()) {
        ColumnMetaData t = typesIter.next();
        if (i++ == columnNumber) {
            newMeta.add(getNewColumnMetaData(t.getName(), t));
        } else {
            newMeta.add(t);
        }
    }

    return schema.newSchema(newMeta);
}
 
Example #10
Source File: DuplicateColumnsTransform.java    From deeplearning4j with Apache License 2.0 6 votes vote down vote up
@Override
public Schema transform(Schema inputSchema) {
    List<ColumnMetaData> oldMeta = inputSchema.getColumnMetaData();
    List<ColumnMetaData> newMeta = new ArrayList<>(oldMeta.size() + newColumnNames.size());

    List<String> oldNames = inputSchema.getColumnNames();

    int dupCount = 0;
    for (int i = 0; i < oldMeta.size(); i++) {
        String current = oldNames.get(i);
        newMeta.add(oldMeta.get(i));

        if (columnsToDuplicateSet.contains(current)) {
            //Duplicate the current columnName, and place it after...
            String dupName = newColumnNames.get(dupCount);
            ColumnMetaData m = oldMeta.get(i).clone();
            m.setName(dupName);
            newMeta.add(m);
            dupCount++;
        }
    }

    return inputSchema.newSchema(newMeta);
}
 
Example #11
Source File: AggregableReductionUtils.java    From deeplearning4j with Apache License 2.0 6 votes vote down vote up
public static IAggregableReduceOp<Writable, List<Writable>> reduceBytesColumn(List<ReduceOp> lop,
                boolean ignoreInvalid, ColumnMetaData metaData) {

    List<IAggregableReduceOp<Byte, Writable>> res = new ArrayList<>(lop.size());
    for (int i = 0; i < lop.size(); i++) {
        switch (lop.get(i)) {
            case TakeFirst:
                res.add(new AggregatorImpls.AggregableFirst<Byte>());
                break;
            case TakeLast:
                res.add(new AggregatorImpls.AggregableLast<Byte>());
                break;
            default:
                throw new UnsupportedOperationException("Cannot execute op \"" + lop.get(i) + "\" on Bytes column "
                                + "(can only perform TakeFirst and TakeLast ops on bytes columns)");
        }
    }
    IAggregableReduceOp<Writable, List<Writable>> thisOp = new ByteWritableOp<>(new AggregableMultiOp<>(res));
    if (ignoreInvalid)
        return new AggregableCheckingOp<>(thisOp, metaData);
    else
        return thisOp;
}
 
Example #12
Source File: AggregableReductionUtils.java    From deeplearning4j with Apache License 2.0 6 votes vote down vote up
public static IAggregableReduceOp<Writable, List<Writable>> reduceColumn(List<ReduceOp> op, ColumnType type,
                boolean ignoreInvalid, ColumnMetaData metaData) {
    switch (type) {
        case Integer:
            return reduceIntColumn(op, ignoreInvalid, metaData);
        case Long:
            return reduceLongColumn(op, ignoreInvalid, metaData);
        case Float:
            return reduceFloatColumn(op, ignoreInvalid, metaData);
        case Double:
            return reduceDoubleColumn(op, ignoreInvalid, metaData);
        case String:
        case Categorical:
            return reduceStringOrCategoricalColumn(op, ignoreInvalid, metaData);
        case Time:
            return reduceTimeColumn(op, ignoreInvalid, metaData);
        case Bytes:
            return reduceBytesColumn(op, ignoreInvalid, metaData);
        default:
            throw new UnsupportedOperationException("Unknown or not implemented column type: " + type);
    }
}
 
Example #13
Source File: LongMathOpTransform.java    From DataVec with Apache License 2.0 6 votes vote down vote up
@Override
public ColumnMetaData getNewColumnMetaData(String newName, ColumnMetaData oldColumnType) {
    if (!(oldColumnType instanceof LongMetaData))
        throw new IllegalStateException("Column is not an Long column");
    LongMetaData meta = (LongMetaData) oldColumnType;
    Long minValue = meta.getMinAllowedValue();
    Long maxValue = meta.getMaxAllowedValue();
    if (minValue != null)
        minValue = doOp(minValue);
    if (maxValue != null)
        maxValue = doOp(maxValue);
    if (minValue != null && maxValue != null && minValue > maxValue) {
        //Consider rsub 1, with original min/max of 0 and 1: (1-0) -> 1 and (1-1) -> 0
        //Or multiplication by -1: (0 to 1) -> (-1 to 0)
        //Need to swap min/max here...
        Long temp = minValue;
        minValue = maxValue;
        maxValue = temp;
    }
    return new LongMetaData(newName, minValue, maxValue);
}
 
Example #14
Source File: AggregableReductionUtils.java    From DataVec with Apache License 2.0 6 votes vote down vote up
public static IAggregableReduceOp<Writable, List<Writable>> reduceBytesColumn(List<ReduceOp> lop,
                boolean ignoreInvalid, ColumnMetaData metaData) {

    List<IAggregableReduceOp<Byte, Writable>> res = new ArrayList<>(lop.size());
    for (int i = 0; i < lop.size(); i++) {
        switch (lop.get(i)) {
            case TakeFirst:
                res.add(new AggregatorImpls.AggregableFirst<Byte>());
                break;
            case TakeLast:
                res.add(new AggregatorImpls.AggregableLast<Byte>());
                break;
            default:
                throw new UnsupportedOperationException("Cannot execute op \"" + lop.get(i) + "\" on Bytes column "
                                + "(can only perform TakeFirst and TakeLast ops on bytes columns)");
        }
    }
    IAggregableReduceOp<Writable, List<Writable>> thisOp = new ByteWritableOp<>(new AggregableMultiOp<>(res));
    if (ignoreInvalid)
        return new AggregableCheckingOp<>(thisOp, metaData);
    else
        return thisOp;
}
 
Example #15
Source File: RemoveAllColumnsExceptForTransform.java    From deeplearning4j with Apache License 2.0 6 votes vote down vote up
@Override
public Schema transform(Schema schema) {
    List<String> origNames = schema.getColumnNames();
    List<ColumnMetaData> origMeta = schema.getColumnMetaData();

    Set<String> keepSet = new HashSet<>();
    Collections.addAll(keepSet, columnsToKeep);


    List<ColumnMetaData> newMeta = new ArrayList<>(columnsToKeep.length);

    Iterator<String> namesIter = origNames.iterator();
    Iterator<ColumnMetaData> metaIter = origMeta.iterator();

    while (namesIter.hasNext()) {
        String n = namesIter.next();
        ColumnMetaData t = metaIter.next();
        if (keepSet.contains(n)) {
            newMeta.add(t);
        }
    }

    return schema.newSchema(newMeta);
}
 
Example #16
Source File: StringReducer.java    From DataVec with Apache License 2.0 6 votes vote down vote up
private static ColumnMetaData getMetaForColumn(StringReduceOp op, String name, ColumnMetaData inMeta) {
    inMeta = inMeta.clone();
    switch (op) {
        case PREPEND:
            inMeta.setName("prepend(" + name + ")");
            return inMeta;
        case APPEND:
            inMeta.setName("append(" + name + ")");
            return inMeta;
        case REPLACE:
            inMeta.setName("replace(" + name + ")");
            return inMeta;
        case MERGE:
            inMeta.setName("merge(" + name + ")");
            return inMeta;
        default:
            throw new UnsupportedOperationException("Unknown or not implemented op: " + op);
    }
}
 
Example #17
Source File: LongMathOpTransform.java    From deeplearning4j with Apache License 2.0 6 votes vote down vote up
@Override
public ColumnMetaData getNewColumnMetaData(String newName, ColumnMetaData oldColumnType) {
    if (!(oldColumnType instanceof LongMetaData))
        throw new IllegalStateException("Column is not an Long column");
    LongMetaData meta = (LongMetaData) oldColumnType;
    Long minValue = meta.getMinAllowedValue();
    Long maxValue = meta.getMaxAllowedValue();
    if (minValue != null)
        minValue = doOp(minValue);
    if (maxValue != null)
        maxValue = doOp(maxValue);
    if (minValue != null && maxValue != null && minValue > maxValue) {
        //Consider rsub 1, with original min/max of 0 and 1: (1-0) -> 1 and (1-1) -> 0
        //Or multiplication by -1: (0 to 1) -> (-1 to 0)
        //Need to swap min/max here...
        Long temp = minValue;
        minValue = maxValue;
        maxValue = temp;
    }
    return new LongMetaData(newName, minValue, maxValue);
}
 
Example #18
Source File: CategoricalToOneHotTransform.java    From DataVec with Apache License 2.0 6 votes vote down vote up
@Override
public void setInputSchema(Schema inputSchema) {
    super.setInputSchema(inputSchema);

    columnIdx = inputSchema.getIndexOfColumn(columnName);
    ColumnMetaData meta = inputSchema.getMetaData(columnName);
    if (!(meta instanceof CategoricalMetaData))
        throw new IllegalStateException("Cannot convert column \"" + columnName
                        + "\" from categorical to one-hot: column is not categorical (is: " + meta.getColumnType()
                        + ")");
    this.stateNames = ((CategoricalMetaData) meta).getStateNames();

    this.statesMap = new HashMap<>(stateNames.size());
    for (int i = 0; i < stateNames.size(); i++) {
        this.statesMap.put(stateNames.get(i), i);
    }
}
 
Example #19
Source File: CategoricalToOneHotTransform.java    From deeplearning4j with Apache License 2.0 6 votes vote down vote up
@Override
public void setInputSchema(Schema inputSchema) {
    super.setInputSchema(inputSchema);

    columnIdx = inputSchema.getIndexOfColumn(columnName);
    ColumnMetaData meta = inputSchema.getMetaData(columnName);
    if (!(meta instanceof CategoricalMetaData))
        throw new IllegalStateException("Cannot convert column \"" + columnName
                        + "\" from categorical to one-hot: column is not categorical (is: " + meta.getColumnType()
                        + ")");
    this.stateNames = ((CategoricalMetaData) meta).getStateNames();

    this.statesMap = new HashMap<>(stateNames.size());
    for (int i = 0; i < stateNames.size(); i++) {
        this.statesMap.put(stateNames.get(i), i);
    }
}
 
Example #20
Source File: FloatMathOpTransform.java    From deeplearning4j with Apache License 2.0 6 votes vote down vote up
@Override
public ColumnMetaData getNewColumnMetaData(String newColumnName, ColumnMetaData oldColumnType) {
    if (!(oldColumnType instanceof FloatMetaData))
        throw new IllegalStateException("Column is not an float column");
    FloatMetaData meta = (FloatMetaData) oldColumnType;
    Float minValue = meta.getMinAllowedValue();
    Float maxValue = meta.getMaxAllowedValue();
    if (minValue != null)
        minValue = doOp(minValue);
    if (maxValue != null)
        maxValue = doOp(maxValue);
    if (minValue != null && maxValue != null && minValue > maxValue) {
        //Consider rsub 1, with original min/max of 0 and 1: (1-0) -> 1 and (1-1) -> 0
        //Or multiplication by -1: (0 to 1) -> (-1 to 0)
        //Need to swap min/max here...
        Float temp = minValue;
        minValue = maxValue;
        maxValue = temp;
    }
    return new FloatMetaData(newColumnName, minValue, maxValue);
}
 
Example #21
Source File: BaseColumnsMathOpTransform.java    From deeplearning4j with Apache License 2.0 5 votes vote down vote up
@Override
public Schema transform(Schema inputSchema) {
    for (String name : columns) {
        if (!inputSchema.hasColumn(name))
            throw new IllegalStateException("Input schema does not have column with name \"" + name + "\"");
    }

    List<ColumnMetaData> newMeta = new ArrayList<>(inputSchema.getColumnMetaData());

    newMeta.add(derivedColumnMetaData(newColumnName, inputSchema));

    return inputSchema.newSchema(newMeta);
}
 
Example #22
Source File: BaseDoubleTransform.java    From DataVec with Apache License 2.0 5 votes vote down vote up
@Override
public ColumnMetaData getNewColumnMetaData(String newColumnName, ColumnMetaData oldColumnMeta) {
    if (oldColumnMeta instanceof DoubleMetaData) {
        ColumnMetaData meta = oldColumnMeta.clone();
        meta.setName(newColumnName);
        return meta;
    } else
        return new DoubleMetaData(newColumnName);
}
 
Example #23
Source File: FirstDigitTransform.java    From deeplearning4j with Apache License 2.0 5 votes vote down vote up
@Override
public Schema transform(Schema inputSchema) {
    List<String> origNames = inputSchema.getColumnNames();
    List<ColumnMetaData> origMeta = inputSchema.getColumnMetaData();

    Preconditions.checkState(origNames.contains(inputColumn), "Input column with name \"%s\" not found in schema", inputColumn);
    Preconditions.checkState(inputColumn.equals(outputColumn) || !origNames.contains(outputColumn),
            "Output column with name \"%s\" already exists in schema (only allowable if input column == output column)", outputColumn);

    List<ColumnMetaData> outMeta = new ArrayList<>(origNames.size()+1);
    for( int i=0; i<origNames.size(); i++ ){
        String s = origNames.get(i);
        if(s.equals(inputColumn)){
            if(!outputColumn.equals(inputColumn)){
                outMeta.add(origMeta.get(i));
            }

            List<String> l = Collections.unmodifiableList(
                    mode == Mode.INCLUDE_OTHER_CATEGORY ?
                            Arrays.asList("0", "1", "2", "3", "4", "5", "6", "7", "8", "9", OTHER_CATEGORY) :
                            Arrays.asList("0", "1", "2", "3", "4", "5", "6", "7", "8", "9"));

            CategoricalMetaData cm = new CategoricalMetaData(outputColumn, l);

            outMeta.add(cm);
        } else {
            outMeta.add(origMeta.get(i));
        }
    }

    return inputSchema.newSchema(outMeta);
}
 
Example #24
Source File: ReduceSequenceByWindowTransform.java    From deeplearning4j with Apache License 2.0 5 votes vote down vote up
@Override
public Schema transform(Schema inputSchema) {
    if (inputSchema != null && !(inputSchema instanceof SequenceSchema)) {
        throw new IllegalArgumentException("Invalid input: input schema must be a SequenceSchema");
    }

    //Some window functions may make changes to the schema (adding window start/end times, for example)
    inputSchema = windowFunction.transform(inputSchema);

    //Approach here: The reducer gives us a schema for one time step -> simply convert this to a sequence schema...
    Schema oneStepSchema = reducer.transform(inputSchema);
    List<ColumnMetaData> meta = oneStepSchema.getColumnMetaData();

    return new SequenceSchema(meta);
}
 
Example #25
Source File: StringReducer.java    From DataVec with Apache License 2.0 5 votes vote down vote up
/**
 * Get the output schema, given the input schema
 */
@Override
public Schema transform(Schema schema) {
    int nCols = schema.numColumns();
    List<ColumnMetaData> meta = schema.getColumnMetaData();
    List<ColumnMetaData> newMeta = new ArrayList<>(nCols);
    newMeta.addAll(meta);
    newMeta.add(new StringMetaData(outputColumnName));
    return schema.newSchema(newMeta);
}
 
Example #26
Source File: RemoveColumnsTransform.java    From deeplearning4j with Apache License 2.0 5 votes vote down vote up
@Override
public Schema transform(Schema schema) {
    int nToRemove = columnsToRemove.length;
    int newNumColumns = schema.numColumns() - nToRemove;
    if (newNumColumns <= 0)
        throw new IllegalStateException("Number of columns after executing operation is " + newNumColumns
                        + " (is <= 0). " + "origColumns = " + schema.getColumnNames() + ", toRemove = "
                        + Arrays.toString(columnsToRemove));

    List<String> origNames = schema.getColumnNames();
    List<ColumnMetaData> origMeta = schema.getColumnMetaData();

    Set<String> set = new HashSet<>();
    Collections.addAll(set, columnsToRemove);


    List<ColumnMetaData> newMeta = new ArrayList<>(newNumColumns);

    Iterator<String> namesIter = origNames.iterator();
    Iterator<ColumnMetaData> metaIter = origMeta.iterator();

    while (namesIter.hasNext()) {
        String n = namesIter.next();
        ColumnMetaData t = metaIter.next();
        if (!set.contains(n)) {
            newMeta.add(t);
        }
    }

    return schema.newSchema(newMeta);
}
 
Example #27
Source File: BaseColumnsMathOpTransform.java    From DataVec with Apache License 2.0 5 votes vote down vote up
@Override
public Schema transform(Schema inputSchema) {
    for (String name : columns) {
        if (!inputSchema.hasColumn(name))
            throw new IllegalStateException("Input schema does not have column with name \"" + name + "\"");
    }

    List<ColumnMetaData> newMeta = new ArrayList<>(inputSchema.getColumnMetaData());

    newMeta.add(derivedColumnMetaData(newColumnName, inputSchema));

    return inputSchema.newSchema(newMeta);
}
 
Example #28
Source File: ConcatenateStringColumns.java    From DataVec with Apache License 2.0 5 votes vote down vote up
@Override
public Schema transform(Schema inputSchema) {
    for (String s : columnsToConcatenate) {
        if (!inputSchema.hasColumn(s)) {
            throw new IllegalStateException("Input schema does not contain column with name \"" + s + "\"");
        }
    }

    List<ColumnMetaData> outMeta = new ArrayList<>();
    outMeta.addAll(inputSchema.getColumnMetaData());

    ColumnMetaData newColMeta = ColumnType.String.newColumnMetaData(newColumnName);
    outMeta.add(newColMeta);
    return inputSchema.newSchema(outMeta);
}
 
Example #29
Source File: AggregableReductionUtils.java    From deeplearning4j with Apache License 2.0 5 votes vote down vote up
public static IAggregableReduceOp<Writable, List<Writable>> reduceStringOrCategoricalColumn(List<ReduceOp> lop,
                boolean ignoreInvalid, ColumnMetaData metaData) {

    List<IAggregableReduceOp<String, Writable>> res = new ArrayList<>(lop.size());
    for (int i = 0; i < lop.size(); i++) {
        switch (lop.get(i)) {
            case Count:
                res.add(new AggregatorImpls.AggregableCount<String>());
                break;
            case CountUnique:
                res.add(new AggregatorImpls.AggregableCountUnique<String>());
                break;
            case TakeFirst:
                res.add(new AggregatorImpls.AggregableFirst<String>());
                break;
            case TakeLast:
                res.add(new AggregatorImpls.AggregableLast<String>());
                break;
            case Append:
                res.add(new StringAggregatorImpls.AggregableStringAppend());
                break;
            case Prepend:
                res.add(new StringAggregatorImpls.AggregableStringPrepend());
                break;
            default:
                throw new UnsupportedOperationException("Cannot execute op \"" + lop.get(i)
                                + "\" on String/Categorical column "
                                + "(can only perform Append, Prepend, Count, CountUnique, TakeFirst and TakeLast ops on categorical columns)");
        }
    }

    IAggregableReduceOp<Writable, List<Writable>> thisOp = new StringWritableOp<>(new AggregableMultiOp<>(res));
    if (ignoreInvalid)
        return new AggregableCheckingOp<>(thisOp, metaData);
    else
        return thisOp;
}
 
Example #30
Source File: SequenceSchema.java    From DataVec with Apache License 2.0 5 votes vote down vote up
@Override
public String toString() {
    StringBuilder sb = new StringBuilder();
    int nCol = numColumns();

    int maxNameLength = 0;
    for (String s : getColumnNames()) {
        maxNameLength = Math.max(maxNameLength, s.length());
    }

    //Header:
    sb.append("SequenceSchema(");

    if (minSequenceLength != null)
        sb.append("minSequenceLength=").append(minSequenceLength);
    if (maxSequenceLength != null) {
        if (minSequenceLength != null)
            sb.append(",");
        sb.append("maxSequenceLength=").append(maxSequenceLength);
    }

    sb.append(")\n");
    sb.append(String.format("%-6s", "idx")).append(String.format("%-" + (maxNameLength + 8) + "s", "name"))
                    .append(String.format("%-15s", "type")).append("meta data").append("\n");

    for (int i = 0; i < nCol; i++) {
        String colName = getName(i);
        ColumnType type = getType(i);
        ColumnMetaData meta = getMetaData(i);
        String paddedName = String.format("%-" + (maxNameLength + 8) + "s", "\"" + colName + "\"");
        sb.append(String.format("%-6d", i)).append(paddedName).append(String.format("%-15s", type)).append(meta)
                        .append("\n");
    }

    return sb.toString();
}