Java Code Examples for org.datavec.api.transform.schema.Schema#getColumnMetaData()

The following examples show how to use org.datavec.api.transform.schema.Schema#getColumnMetaData() . You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: DuplicateColumnsTransform.java    From deeplearning4j with Apache License 2.0 6 votes vote down vote up
@Override
public Schema transform(Schema inputSchema) {
    List<ColumnMetaData> oldMeta = inputSchema.getColumnMetaData();
    List<ColumnMetaData> newMeta = new ArrayList<>(oldMeta.size() + newColumnNames.size());

    List<String> oldNames = inputSchema.getColumnNames();

    int dupCount = 0;
    for (int i = 0; i < oldMeta.size(); i++) {
        String current = oldNames.get(i);
        newMeta.add(oldMeta.get(i));

        if (columnsToDuplicateSet.contains(current)) {
            //Duplicate the current columnName, and place it after...
            String dupName = newColumnNames.get(dupCount);
            ColumnMetaData m = oldMeta.get(i).clone();
            m.setName(dupName);
            newMeta.add(m);
            dupCount++;
        }
    }

    return inputSchema.newSchema(newMeta);
}
 
Example 2
Source File: RemoveAllColumnsExceptForTransform.java    From DataVec with Apache License 2.0 6 votes vote down vote up
@Override
public Schema transform(Schema schema) {
    List<String> origNames = schema.getColumnNames();
    List<ColumnMetaData> origMeta = schema.getColumnMetaData();

    Set<String> keepSet = new HashSet<>();
    Collections.addAll(keepSet, columnsToKeep);


    List<ColumnMetaData> newMeta = new ArrayList<>(columnsToKeep.length);

    Iterator<String> namesIter = origNames.iterator();
    Iterator<ColumnMetaData> metaIter = origMeta.iterator();

    while (namesIter.hasNext()) {
        String n = namesIter.next();
        ColumnMetaData t = metaIter.next();
        if (keepSet.contains(n)) {
            newMeta.add(t);
        }
    }

    return schema.newSchema(newMeta);
}
 
Example 3
Source File: BaseSequenceExpansionTransform.java    From DataVec with Apache License 2.0 6 votes vote down vote up
@Override
public Schema transform(Schema inputSchema) {
    //Same schema *except* for the expanded columns

    List<ColumnMetaData> meta = new ArrayList<>(inputSchema.numColumns());

    List<ColumnMetaData> oldMetaToExpand = new ArrayList<>();
    for(String s : requiredColumns){
        oldMetaToExpand.add(inputSchema.getMetaData(s));
    }
    List<ColumnMetaData> newMetaToExpand = expandedColumnMetaDatas(oldMetaToExpand, expandedColumnNames);

    int modColumnIdx = 0;
    for(ColumnMetaData m : inputSchema.getColumnMetaData()){

        if(requiredColumns.contains(m.getName())){
            //Possibly changed column (expanded)
            meta.add(newMetaToExpand.get(modColumnIdx++));
        } else {
            //Unmodified column
            meta.add(m);
        }
    }

    return inputSchema.newSchema(meta);
}
 
Example 4
Source File: BaseSequenceExpansionTransform.java    From deeplearning4j with Apache License 2.0 6 votes vote down vote up
@Override
public Schema transform(Schema inputSchema) {
    //Same schema *except* for the expanded columns

    List<ColumnMetaData> meta = new ArrayList<>(inputSchema.numColumns());

    List<ColumnMetaData> oldMetaToExpand = new ArrayList<>();
    for(String s : requiredColumns){
        oldMetaToExpand.add(inputSchema.getMetaData(s));
    }
    List<ColumnMetaData> newMetaToExpand = expandedColumnMetaDatas(oldMetaToExpand, expandedColumnNames);

    int modColumnIdx = 0;
    for(ColumnMetaData m : inputSchema.getColumnMetaData()){

        if(requiredColumns.contains(m.getName())){
            //Possibly changed column (expanded)
            meta.add(newMetaToExpand.get(modColumnIdx++));
        } else {
            //Unmodified column
            meta.add(m);
        }
    }

    return inputSchema.newSchema(meta);
}
 
Example 5
Source File: CalculateSortedRank.java    From deeplearning4j with Apache License 2.0 5 votes vote down vote up
@Override
public Schema transform(Schema inputSchema) {
    if (inputSchema instanceof SequenceSchema)
        throw new IllegalStateException("Calculating sorted rank on sequences: not yet supported");

    List<ColumnMetaData> origMeta = inputSchema.getColumnMetaData();
    List<ColumnMetaData> newMeta = new ArrayList<>(origMeta);

    newMeta.add(new LongMetaData(newColumnName, 0L, null));

    return inputSchema.newSchema(newMeta);
}
 
Example 6
Source File: ReduceSequenceTransform.java    From DataVec with Apache License 2.0 5 votes vote down vote up
@Override
public Schema transform(Schema inputSchema) {
    if (inputSchema != null && !(inputSchema instanceof SequenceSchema)) {
        throw new IllegalArgumentException("Invalid input: input schema must be a SequenceSchema");
    }

    //Approach here: The reducer gives us a schema for one time step -> simply convert this to a sequence schema...
    Schema oneStepSchema = reducer.transform(inputSchema);
    List<ColumnMetaData> meta = oneStepSchema.getColumnMetaData();

    return new SequenceSchema(meta);
}
 
Example 7
Source File: ReduceSequenceByWindowTransform.java    From DataVec with Apache License 2.0 5 votes vote down vote up
@Override
public Schema transform(Schema inputSchema) {
    if (inputSchema != null && !(inputSchema instanceof SequenceSchema)) {
        throw new IllegalArgumentException("Invalid input: input schema must be a SequenceSchema");
    }

    //Some window functions may make changes to the schema (adding window start/end times, for example)
    inputSchema = windowFunction.transform(inputSchema);

    //Approach here: The reducer gives us a schema for one time step -> simply convert this to a sequence schema...
    Schema oneStepSchema = reducer.transform(inputSchema);
    List<ColumnMetaData> meta = oneStepSchema.getColumnMetaData();

    return new SequenceSchema(meta);
}
 
Example 8
Source File: StringReducer.java    From DataVec with Apache License 2.0 5 votes vote down vote up
/**
 * Get the output schema, given the input schema
 */
@Override
public Schema transform(Schema schema) {
    int nCols = schema.numColumns();
    List<ColumnMetaData> meta = schema.getColumnMetaData();
    List<ColumnMetaData> newMeta = new ArrayList<>(nCols);
    newMeta.addAll(meta);
    newMeta.add(new StringMetaData(outputColumnName));
    return schema.newSchema(newMeta);
}
 
Example 9
Source File: CategoricalToOneHotTransform.java    From DataVec with Apache License 2.0 5 votes vote down vote up
@Override
public Schema transform(Schema schema) {
    List<String> origNames = schema.getColumnNames();
    List<ColumnMetaData> origMeta = schema.getColumnMetaData();

    int i = 0;
    Iterator<String> namesIter = origNames.iterator();
    Iterator<ColumnMetaData> typesIter = origMeta.iterator();

    List<ColumnMetaData> newMeta = new ArrayList<>(schema.numColumns());

    while (namesIter.hasNext()) {
        String s = namesIter.next();
        ColumnMetaData t = typesIter.next();

        if (i++ == columnIdx) {
            //Convert this to one-hot:
            for (String stateName : stateNames) {
                String newName = s + "[" + stateName + "]";
                newMeta.add(new IntegerMetaData(newName, 0, 1));
            }
        } else {
            newMeta.add(t);
        }
    }

    return schema.newSchema(newMeta);
}
 
Example 10
Source File: FirstDigitTransform.java    From deeplearning4j with Apache License 2.0 5 votes vote down vote up
@Override
public Schema transform(Schema inputSchema) {
    List<String> origNames = inputSchema.getColumnNames();
    List<ColumnMetaData> origMeta = inputSchema.getColumnMetaData();

    Preconditions.checkState(origNames.contains(inputColumn), "Input column with name \"%s\" not found in schema", inputColumn);
    Preconditions.checkState(inputColumn.equals(outputColumn) || !origNames.contains(outputColumn),
            "Output column with name \"%s\" already exists in schema (only allowable if input column == output column)", outputColumn);

    List<ColumnMetaData> outMeta = new ArrayList<>(origNames.size()+1);
    for( int i=0; i<origNames.size(); i++ ){
        String s = origNames.get(i);
        if(s.equals(inputColumn)){
            if(!outputColumn.equals(inputColumn)){
                outMeta.add(origMeta.get(i));
            }

            List<String> l = Collections.unmodifiableList(
                    mode == Mode.INCLUDE_OTHER_CATEGORY ?
                            Arrays.asList("0", "1", "2", "3", "4", "5", "6", "7", "8", "9", OTHER_CATEGORY) :
                            Arrays.asList("0", "1", "2", "3", "4", "5", "6", "7", "8", "9"));

            CategoricalMetaData cm = new CategoricalMetaData(outputColumn, l);

            outMeta.add(cm);
        } else {
            outMeta.add(origMeta.get(i));
        }
    }

    return inputSchema.newSchema(outMeta);
}
 
Example 11
Source File: StringReducer.java    From deeplearning4j with Apache License 2.0 5 votes vote down vote up
/**
 * Get the output schema, given the input schema
 */
@Override
public Schema transform(Schema schema) {
    int nCols = schema.numColumns();
    List<ColumnMetaData> meta = schema.getColumnMetaData();
    List<ColumnMetaData> newMeta = new ArrayList<>(nCols);
    newMeta.addAll(meta);
    newMeta.add(new StringMetaData(outputColumnName));
    return schema.newSchema(newMeta);
}
 
Example 12
Source File: DeriveColumnsFromTimeTransform.java    From deeplearning4j with Apache License 2.0 5 votes vote down vote up
@Override
public Schema transform(Schema inputSchema) {
    List<ColumnMetaData> oldMeta = inputSchema.getColumnMetaData();
    List<ColumnMetaData> newMeta = new ArrayList<>(oldMeta.size() + derivedColumns.size());

    List<String> oldNames = inputSchema.getColumnNames();

    for (int i = 0; i < oldMeta.size(); i++) {
        String current = oldNames.get(i);
        newMeta.add(oldMeta.get(i));

        if (insertAfter.equals(current)) {
            //Insert the derived columns here
            for (DerivedColumn d : derivedColumns) {
                switch (d.columnType) {
                    case String:
                        newMeta.add(new StringMetaData(d.columnName));
                        break;
                    case Integer:
                        newMeta.add(new IntegerMetaData(d.columnName)); //TODO: ranges... if it's a day, we know it must be 1 to 31, etc...
                        break;
                    default:
                        throw new IllegalStateException("Unexpected column type: " + d.columnType);
                }
            }
        }
    }

    return inputSchema.newSchema(newMeta);
}
 
Example 13
Source File: DeriveColumnsFromTimeTransform.java    From DataVec with Apache License 2.0 5 votes vote down vote up
@Override
public Schema transform(Schema inputSchema) {
    List<ColumnMetaData> oldMeta = inputSchema.getColumnMetaData();
    List<ColumnMetaData> newMeta = new ArrayList<>(oldMeta.size() + derivedColumns.size());

    List<String> oldNames = inputSchema.getColumnNames();

    for (int i = 0; i < oldMeta.size(); i++) {
        String current = oldNames.get(i);
        newMeta.add(oldMeta.get(i));

        if (insertAfter.equals(current)) {
            //Insert the derived columns here
            for (DerivedColumn d : derivedColumns) {
                switch (d.columnType) {
                    case String:
                        newMeta.add(new StringMetaData(d.columnName));
                        break;
                    case Integer:
                        newMeta.add(new IntegerMetaData(d.columnName)); //TODO: ranges... if it's a day, we know it must be 1 to 31, etc...
                        break;
                    default:
                        throw new IllegalStateException("Unexpected column type: " + d.columnType);
                }
            }
        }
    }

    return inputSchema.newSchema(newMeta);
}
 
Example 14
Source File: RemoveColumnsTransform.java    From DataVec with Apache License 2.0 5 votes vote down vote up
@Override
public Schema transform(Schema schema) {
    int nToRemove = columnsToRemove.length;
    int newNumColumns = schema.numColumns() - nToRemove;
    if (newNumColumns <= 0)
        throw new IllegalStateException("Number of columns after executing operation is " + newNumColumns
                        + " (is <= 0). " + "origColumns = " + schema.getColumnNames() + ", toRemove = "
                        + Arrays.toString(columnsToRemove));

    List<String> origNames = schema.getColumnNames();
    List<ColumnMetaData> origMeta = schema.getColumnMetaData();

    Set<String> set = new HashSet<>();
    Collections.addAll(set, columnsToRemove);


    List<ColumnMetaData> newMeta = new ArrayList<>(newNumColumns);

    Iterator<String> namesIter = origNames.iterator();
    Iterator<ColumnMetaData> metaIter = origMeta.iterator();

    while (namesIter.hasNext()) {
        String n = namesIter.next();
        ColumnMetaData t = metaIter.next();
        if (!set.contains(n)) {
            newMeta.add(t);
        }
    }

    return schema.newSchema(newMeta);
}
 
Example 15
Source File: StringListToCategoricalSetTransform.java    From deeplearning4j with Apache License 2.0 5 votes vote down vote up
@Override
public Schema transform(Schema inputSchema) {

    int colIdx = inputSchema.getIndexOfColumn(columnName);

    List<ColumnMetaData> oldMeta = inputSchema.getColumnMetaData();
    List<ColumnMetaData> newMeta = new ArrayList<>(oldMeta.size() + newColumnNames.size() - 1);
    List<String> oldNames = inputSchema.getColumnNames();

    Iterator<ColumnMetaData> typesIter = oldMeta.iterator();
    Iterator<String> namesIter = oldNames.iterator();

    int i = 0;
    while (typesIter.hasNext()) {
        ColumnMetaData t = typesIter.next();
        String name = namesIter.next();
        if (i++ == colIdx) {
            //Replace String column with a set of binary/categorical columns
            if (t.getColumnType() != ColumnType.String)
                throw new IllegalStateException("Cannot convert non-string type");

            for (int j = 0; j < newColumnNames.size(); j++) {
                ColumnMetaData meta = new CategoricalMetaData(newColumnNames.get(j), "true", "false");
                newMeta.add(meta);
            }
        } else {
            newMeta.add(t);
        }
    }

    return inputSchema.newSchema(newMeta);

}
 
Example 16
Source File: CategoricalToOneHotTransform.java    From deeplearning4j with Apache License 2.0 5 votes vote down vote up
@Override
public Schema transform(Schema schema) {
    List<String> origNames = schema.getColumnNames();
    List<ColumnMetaData> origMeta = schema.getColumnMetaData();

    int i = 0;
    Iterator<String> namesIter = origNames.iterator();
    Iterator<ColumnMetaData> typesIter = origMeta.iterator();

    List<ColumnMetaData> newMeta = new ArrayList<>(schema.numColumns());

    while (namesIter.hasNext()) {
        String s = namesIter.next();
        ColumnMetaData t = typesIter.next();

        if (i++ == columnIdx) {
            //Convert this to one-hot:
            for (String stateName : stateNames) {
                String newName = s + "[" + stateName + "]";
                newMeta.add(new IntegerMetaData(newName, 0, 1));
            }
        } else {
            newMeta.add(t);
        }
    }

    return schema.newSchema(newMeta);
}
 
Example 17
Source File: ReduceSequenceByWindowTransform.java    From deeplearning4j with Apache License 2.0 5 votes vote down vote up
@Override
public Schema transform(Schema inputSchema) {
    if (inputSchema != null && !(inputSchema instanceof SequenceSchema)) {
        throw new IllegalArgumentException("Invalid input: input schema must be a SequenceSchema");
    }

    //Some window functions may make changes to the schema (adding window start/end times, for example)
    inputSchema = windowFunction.transform(inputSchema);

    //Approach here: The reducer gives us a schema for one time step -> simply convert this to a sequence schema...
    Schema oneStepSchema = reducer.transform(inputSchema);
    List<ColumnMetaData> meta = oneStepSchema.getColumnMetaData();

    return new SequenceSchema(meta);
}
 
Example 18
Source File: PivotTransform.java    From DataVec with Apache License 2.0 4 votes vote down vote up
@Override
public Schema transform(Schema inputSchema) {
    if (!inputSchema.hasColumn(keyColumn) || !inputSchema.hasColumn(valueColumn)) {
        throw new UnsupportedOperationException("Key or value column not found: " + keyColumn + ", " + valueColumn
                        + " in " + inputSchema.getColumnNames());
    }

    List<String> origNames = inputSchema.getColumnNames();
    List<ColumnMetaData> origMeta = inputSchema.getColumnMetaData();

    int i = 0;
    Iterator<String> namesIter = origNames.iterator();
    Iterator<ColumnMetaData> typesIter = origMeta.iterator();

    List<ColumnMetaData> newMeta = new ArrayList<>(inputSchema.numColumns());

    int idxKey = inputSchema.getIndexOfColumn(keyColumn);
    int idxValue = inputSchema.getIndexOfColumn(valueColumn);

    ColumnMetaData valueMeta = inputSchema.getMetaData(idxValue);

    while (namesIter.hasNext()) {
        String s = namesIter.next();
        ColumnMetaData t = typesIter.next();

        if (i == idxKey) {
            //Convert this to a set of separate columns
            List<String> stateNames = ((CategoricalMetaData) inputSchema.getMetaData(idxKey)).getStateNames();
            for (String stateName : stateNames) {
                String newName = s + "[" + stateName + "]";

                ColumnMetaData newValueMeta = valueMeta.clone();
                newValueMeta.setName(newName);

                newMeta.add(newValueMeta);
            }
        } else if (i == idxValue) {
            i++;
            continue; //Skip column
        } else {
            newMeta.add(t);
        }
        i++;
    }

    //Infer the default value if necessary
    if (defaultValue == null) {
        switch (valueMeta.getColumnType()) {
            case String:
                defaultValue = new Text("");
                break;
            case Integer:
                defaultValue = new IntWritable(0);
                break;
            case Long:
                defaultValue = new LongWritable(0);
                break;
            case Double:
                defaultValue = new DoubleWritable(0.0);
                break;
            case Float:
                defaultValue = new FloatWritable(0.0f);
                break;
            case Categorical:
                defaultValue = new NullWritable();
                break;
            case Time:
                defaultValue = new LongWritable(0);
                break;
            case Bytes:
                throw new UnsupportedOperationException("Cannot infer default value for bytes");
            case Boolean:
                defaultValue = new Text("false");
                break;
            default:
                throw new UnsupportedOperationException(
                                "Cannot infer default value for " + valueMeta.getColumnType());
        }
    }

    return inputSchema.newSchema(newMeta);
}
 
Example 19
Source File: NDArrayDistanceTransform.java    From DataVec with Apache License 2.0 4 votes vote down vote up
@Override
public Schema transform(Schema inputSchema) {
    List<ColumnMetaData> newMeta = new ArrayList<>(inputSchema.getColumnMetaData());
    newMeta.add(new DoubleMetaData(newColumnName));
    return inputSchema.newSchema(newMeta);
}
 
Example 20
Source File: PivotTransform.java    From deeplearning4j with Apache License 2.0 4 votes vote down vote up
@Override
public Schema transform(Schema inputSchema) {
    if (!inputSchema.hasColumn(keyColumn) || !inputSchema.hasColumn(valueColumn)) {
        throw new UnsupportedOperationException("Key or value column not found: " + keyColumn + ", " + valueColumn
                        + " in " + inputSchema.getColumnNames());
    }

    List<String> origNames = inputSchema.getColumnNames();
    List<ColumnMetaData> origMeta = inputSchema.getColumnMetaData();

    int i = 0;
    Iterator<String> namesIter = origNames.iterator();
    Iterator<ColumnMetaData> typesIter = origMeta.iterator();

    List<ColumnMetaData> newMeta = new ArrayList<>(inputSchema.numColumns());

    int idxKey = inputSchema.getIndexOfColumn(keyColumn);
    int idxValue = inputSchema.getIndexOfColumn(valueColumn);

    ColumnMetaData valueMeta = inputSchema.getMetaData(idxValue);

    while (namesIter.hasNext()) {
        String s = namesIter.next();
        ColumnMetaData t = typesIter.next();

        if (i == idxKey) {
            //Convert this to a set of separate columns
            List<String> stateNames = ((CategoricalMetaData) inputSchema.getMetaData(idxKey)).getStateNames();
            for (String stateName : stateNames) {
                String newName = s + "[" + stateName + "]";

                ColumnMetaData newValueMeta = valueMeta.clone();
                newValueMeta.setName(newName);

                newMeta.add(newValueMeta);
            }
        } else if (i == idxValue) {
            i++;
            continue; //Skip column
        } else {
            newMeta.add(t);
        }
        i++;
    }

    //Infer the default value if necessary
    if (defaultValue == null) {
        switch (valueMeta.getColumnType()) {
            case String:
                defaultValue = new Text("");
                break;
            case Integer:
                defaultValue = new IntWritable(0);
                break;
            case Long:
                defaultValue = new LongWritable(0);
                break;
            case Double:
                defaultValue = new DoubleWritable(0.0);
                break;
            case Float:
                defaultValue = new FloatWritable(0.0f);
                break;
            case Categorical:
                defaultValue = new NullWritable();
                break;
            case Time:
                defaultValue = new LongWritable(0);
                break;
            case Bytes:
                throw new UnsupportedOperationException("Cannot infer default value for bytes");
            case Boolean:
                defaultValue = new Text("false");
                break;
            default:
                throw new UnsupportedOperationException(
                                "Cannot infer default value for " + valueMeta.getColumnType());
        }
    }

    return inputSchema.newSchema(newMeta);
}