Java Code Examples for org.datavec.api.transform.schema.Schema#getIndexOfColumn()

The following examples show how to use org.datavec.api.transform.schema.Schema#getIndexOfColumn() . You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: CategoricalToIntegerTransform.java    From deeplearning4j with Apache License 2.0 6 votes vote down vote up
@Override
public void setInputSchema(Schema inputSchema) {
    super.setInputSchema(inputSchema);

    columnIdx = inputSchema.getIndexOfColumn(columnName);
    ColumnMetaData meta = inputSchema.getMetaData(columnName);
    if (!(meta instanceof CategoricalMetaData))
        throw new IllegalStateException("Cannot convert column \"" + columnName
                        + "\" from categorical to one-hot: column is not categorical (is: " + meta.getColumnType()
                        + ")");
    this.stateNames = ((CategoricalMetaData) meta).getStateNames();

    this.statesMap = new HashMap<>(stateNames.size());
    for (int i = 0; i < stateNames.size(); i++) {
        this.statesMap.put(stateNames.get(i), i);
    }
}
 
Example 2
Source File: CategoricalToOneHotTransform.java    From DataVec with Apache License 2.0 6 votes vote down vote up
@Override
public void setInputSchema(Schema inputSchema) {
    super.setInputSchema(inputSchema);

    columnIdx = inputSchema.getIndexOfColumn(columnName);
    ColumnMetaData meta = inputSchema.getMetaData(columnName);
    if (!(meta instanceof CategoricalMetaData))
        throw new IllegalStateException("Cannot convert column \"" + columnName
                        + "\" from categorical to one-hot: column is not categorical (is: " + meta.getColumnType()
                        + ")");
    this.stateNames = ((CategoricalMetaData) meta).getStateNames();

    this.statesMap = new HashMap<>(stateNames.size());
    for (int i = 0; i < stateNames.size(); i++) {
        this.statesMap.put(stateNames.get(i), i);
    }
}
 
Example 3
Source File: CategoricalToOneHotTransform.java    From deeplearning4j with Apache License 2.0 6 votes vote down vote up
@Override
public void setInputSchema(Schema inputSchema) {
    super.setInputSchema(inputSchema);

    columnIdx = inputSchema.getIndexOfColumn(columnName);
    ColumnMetaData meta = inputSchema.getMetaData(columnName);
    if (!(meta instanceof CategoricalMetaData))
        throw new IllegalStateException("Cannot convert column \"" + columnName
                        + "\" from categorical to one-hot: column is not categorical (is: " + meta.getColumnType()
                        + ")");
    this.stateNames = ((CategoricalMetaData) meta).getStateNames();

    this.statesMap = new HashMap<>(stateNames.size());
    for (int i = 0; i < stateNames.size(); i++) {
        this.statesMap.put(stateNames.get(i), i);
    }
}
 
Example 4
Source File: FilterInvalidValues.java    From deeplearning4j with Apache License 2.0 5 votes vote down vote up
@Override
public void setInputSchema(Schema schema) {
    this.schema = schema;
    if (!filterAnyInvalid) {
        this.columnIdxs = new int[columnsToFilterIfInvalid.length];
        for (int i = 0; i < columnsToFilterIfInvalid.length; i++) {
            this.columnIdxs[i] = schema.getIndexOfColumn(columnsToFilterIfInvalid[i]);
        }
    }
}
 
Example 5
Source File: ConditionalCopyValueTransform.java    From deeplearning4j with Apache License 2.0 5 votes vote down vote up
@Override
public void setInputSchema(Schema inputSchema) {
    if (!inputSchema.hasColumn(columnToReplace))
        throw new IllegalStateException("Column \"" + columnToReplace + "\" not found in input schema");
    if (!inputSchema.hasColumn(sourceColumn))
        throw new IllegalStateException("Column \"" + sourceColumn + "\" not found in input schema");
    columnToReplaceIdx = inputSchema.getIndexOfColumn(columnToReplace);
    sourceColumnIdx = inputSchema.getIndexOfColumn(sourceColumn);
    condition.setInputSchema(inputSchema);
}
 
Example 6
Source File: SequenceSplitTimeSeparation.java    From DataVec with Apache License 2.0 5 votes vote down vote up
@Override
public void setInputSchema(Schema inputSchema) {
    if (!inputSchema.hasColumn(timeColumn))
        throw new IllegalStateException(
                        "Invalid state: schema does not have column " + "with name \"" + timeColumn + "\"");
    if (inputSchema.getMetaData(timeColumn).getColumnType() != ColumnType.Time) {
        throw new IllegalStateException("Invalid input schema: schema column \"" + timeColumn
                        + "\" is not a time column." + " (Is type: "
                        + inputSchema.getMetaData(timeColumn).getColumnType() + ")");
    }

    this.timeColumnIdx = inputSchema.getIndexOfColumn(timeColumn);
    this.schema = inputSchema;
}
 
Example 7
Source File: StringListToCountsNDArrayTransform.java    From deeplearning4j with Apache License 2.0 5 votes vote down vote up
@Override
public Schema transform(Schema inputSchema) {

    int colIdx = inputSchema.getIndexOfColumn(columnName);

    List<ColumnMetaData> oldMeta = inputSchema.getColumnMetaData();
    List<ColumnMetaData> newMeta = new ArrayList<>();
    List<String> oldNames = inputSchema.getColumnNames();

    Iterator<ColumnMetaData> typesIter = oldMeta.iterator();
    Iterator<String> namesIter = oldNames.iterator();

    int i = 0;
    while (typesIter.hasNext()) {
        ColumnMetaData t = typesIter.next();
        String name = namesIter.next();
        if (i++ == colIdx) {
            //Replace String column with a set of binary/integer columns
            if (t.getColumnType() != ColumnType.String)
                throw new IllegalStateException("Cannot convert non-string type");

            ColumnMetaData meta = new NDArrayMetaData(newColumnName, new long[] {vocabulary.size()});
            newMeta.add(meta);
        } else {
            newMeta.add(t);
        }
    }

    return inputSchema.newSchema(newMeta);

}
 
Example 8
Source File: FirstDigitTransform.java    From deeplearning4j with Apache License 2.0 5 votes vote down vote up
@Override
public void setInputSchema(Schema schema){
    super.setInputSchema(schema);

    columnIdx = schema.getIndexOfColumn(inputColumn);
    Preconditions.checkState(columnIdx >= 0, "Input column \"%s\" not found in schema", inputColumn);
}
 
Example 9
Source File: SequenceMovingWindowReduceTransform.java    From deeplearning4j with Apache License 2.0 5 votes vote down vote up
@Override
public Schema transform(Schema inputSchema) {
    int colIdx = inputSchema.getIndexOfColumn(columnName);

    //Approach here: The reducer gives us a schema for one time step -> simply convert this to a sequence schema...
    List<ColumnMetaData> oldMeta = inputSchema.getColumnMetaData();
    List<ColumnMetaData> meta = new ArrayList<>(oldMeta);

    ColumnMetaData m;
    switch (op) {
        case Min:
        case Max:
        case Range:
        case TakeFirst:
        case TakeLast:
            //Same type as input
            m = oldMeta.get(colIdx);
            m = m.clone();
            m.setName(newColumnName);
            break;
        case Prod:
        case Sum:
        case Mean:
        case Stdev:
            //Double type
            m = new DoubleMetaData(newColumnName);
            break;
        case Count:
        case CountUnique:
            //Integer type
            m = new IntegerMetaData(newColumnName);
            break;
        default:
            throw new UnsupportedOperationException("Unknown op type: " + op);
    }
    meta.add(m);

    return new SequenceSchema(meta);
}
 
Example 10
Source File: AnalyzeLocal.java    From deeplearning4j with Apache License 2.0 5 votes vote down vote up
/**
 * Get a list of unique values from the specified columns.
 * For sequence data, use {@link #getUniqueSequence(List, Schema, SequenceRecordReader)}
 *
 * @param columnName    Name of the column to get unique values from
 * @param schema        Data schema
 * @param data          Data to get unique values from
 * @return              List of unique values
 */
public static Set<Writable> getUnique(String columnName, Schema schema, RecordReader data) {
    int colIdx = schema.getIndexOfColumn(columnName);
    Set<Writable> unique = new HashSet<>();
    while(data.hasNext()){
        List<Writable> next = data.next();
        unique.add(next.get(colIdx));
    }
    return unique;
}
 
Example 11
Source File: StringListToCategoricalSetTransform.java    From DataVec with Apache License 2.0 5 votes vote down vote up
@Override
public Schema transform(Schema inputSchema) {

    int colIdx = inputSchema.getIndexOfColumn(columnName);

    List<ColumnMetaData> oldMeta = inputSchema.getColumnMetaData();
    List<ColumnMetaData> newMeta = new ArrayList<>(oldMeta.size() + newColumnNames.size() - 1);
    List<String> oldNames = inputSchema.getColumnNames();

    Iterator<ColumnMetaData> typesIter = oldMeta.iterator();
    Iterator<String> namesIter = oldNames.iterator();

    int i = 0;
    while (typesIter.hasNext()) {
        ColumnMetaData t = typesIter.next();
        String name = namesIter.next();
        if (i++ == colIdx) {
            //Replace String column with a set of binary/categorical columns
            if (t.getColumnType() != ColumnType.String)
                throw new IllegalStateException("Cannot convert non-string type");

            for (int j = 0; j < newColumnNames.size(); j++) {
                ColumnMetaData meta = new CategoricalMetaData(newColumnNames.get(j), "true", "false");
                newMeta.add(meta);
            }
        } else {
            newMeta.add(t);
        }
    }

    return inputSchema.newSchema(newMeta);

}
 
Example 12
Source File: StringListToCategoricalSetTransform.java    From DataVec with Apache License 2.0 4 votes vote down vote up
@Override
public void setInputSchema(Schema inputSchema) {
    this.inputSchema = inputSchema;
    this.columnIdx = inputSchema.getIndexOfColumn(columnName);
}
 
Example 13
Source File: PivotTransform.java    From DataVec with Apache License 2.0 4 votes vote down vote up
@Override
public Schema transform(Schema inputSchema) {
    if (!inputSchema.hasColumn(keyColumn) || !inputSchema.hasColumn(valueColumn)) {
        throw new UnsupportedOperationException("Key or value column not found: " + keyColumn + ", " + valueColumn
                        + " in " + inputSchema.getColumnNames());
    }

    List<String> origNames = inputSchema.getColumnNames();
    List<ColumnMetaData> origMeta = inputSchema.getColumnMetaData();

    int i = 0;
    Iterator<String> namesIter = origNames.iterator();
    Iterator<ColumnMetaData> typesIter = origMeta.iterator();

    List<ColumnMetaData> newMeta = new ArrayList<>(inputSchema.numColumns());

    int idxKey = inputSchema.getIndexOfColumn(keyColumn);
    int idxValue = inputSchema.getIndexOfColumn(valueColumn);

    ColumnMetaData valueMeta = inputSchema.getMetaData(idxValue);

    while (namesIter.hasNext()) {
        String s = namesIter.next();
        ColumnMetaData t = typesIter.next();

        if (i == idxKey) {
            //Convert this to a set of separate columns
            List<String> stateNames = ((CategoricalMetaData) inputSchema.getMetaData(idxKey)).getStateNames();
            for (String stateName : stateNames) {
                String newName = s + "[" + stateName + "]";

                ColumnMetaData newValueMeta = valueMeta.clone();
                newValueMeta.setName(newName);

                newMeta.add(newValueMeta);
            }
        } else if (i == idxValue) {
            i++;
            continue; //Skip column
        } else {
            newMeta.add(t);
        }
        i++;
    }

    //Infer the default value if necessary
    if (defaultValue == null) {
        switch (valueMeta.getColumnType()) {
            case String:
                defaultValue = new Text("");
                break;
            case Integer:
                defaultValue = new IntWritable(0);
                break;
            case Long:
                defaultValue = new LongWritable(0);
                break;
            case Double:
                defaultValue = new DoubleWritable(0.0);
                break;
            case Float:
                defaultValue = new FloatWritable(0.0f);
                break;
            case Categorical:
                defaultValue = new NullWritable();
                break;
            case Time:
                defaultValue = new LongWritable(0);
                break;
            case Bytes:
                throw new UnsupportedOperationException("Cannot infer default value for bytes");
            case Boolean:
                defaultValue = new Text("false");
                break;
            default:
                throw new UnsupportedOperationException(
                                "Cannot infer default value for " + valueMeta.getColumnType());
        }
    }

    return inputSchema.newSchema(newMeta);
}
 
Example 14
Source File: BaseColumnComparator.java    From DataVec with Apache License 2.0 4 votes vote down vote up
@Override
public void setSchema(Schema sequenceSchema) {
    this.schema = sequenceSchema;
    this.columnIdx = sequenceSchema.getIndexOfColumn(columnName);
}
 
Example 15
Source File: StringListToCountsNDArrayTransform.java    From DataVec with Apache License 2.0 4 votes vote down vote up
@Override
public void setInputSchema(Schema inputSchema) {
    this.inputSchema = inputSchema;
    this.columnIdx = inputSchema.getIndexOfColumn(columnName);
}
 
Example 16
Source File: BaseColumnComparator.java    From deeplearning4j with Apache License 2.0 4 votes vote down vote up
@Override
public void setSchema(Schema sequenceSchema) {
    this.schema = sequenceSchema;
    this.columnIdx = sequenceSchema.getIndexOfColumn(columnName);
}
 
Example 17
Source File: AnalyzeSpark.java    From DataVec with Apache License 2.0 3 votes vote down vote up
/**
 * Randomly sample values from a single column
 *
 * @param count         Number of values to sample
 * @param columnName    Name of the column to sample from
 * @param schema        Schema
 * @param data          Data to sample from
 * @return              A list of random samples
 */
public static List<Writable> sampleFromColumn(int count, String columnName, Schema schema,
                JavaRDD<List<Writable>> data) {
    int colIdx = schema.getIndexOfColumn(columnName);
    JavaRDD<Writable> ithColumn = data.map(new SelectColumnFunction(colIdx));

    return ithColumn.takeSample(false, count);
}
 
Example 18
Source File: AnalyzeSpark.java    From deeplearning4j with Apache License 2.0 2 votes vote down vote up
/**
 * Get a list of unique values from the specified columns.
 * For sequence data, use {@link #getUniqueSequence(List, Schema, JavaRDD)}
 *
 * @param columnName    Name of the column to get unique values from
 * @param schema        Data schema
 * @param data          Data to get unique values from
 * @return              List of unique values
 */
public static List<Writable> getUnique(String columnName, Schema schema, JavaRDD<List<Writable>> data) {
    int colIdx = schema.getIndexOfColumn(columnName);
    JavaRDD<Writable> ithColumn = data.map(new SelectColumnFunction(colIdx));
    return ithColumn.distinct().collect();
}
 
Example 19
Source File: AnalyzeSpark.java    From DataVec with Apache License 2.0 2 votes vote down vote up
/**
 * Get the maximum value for the specified column
 *
 * @param allData    All data
 * @param columnName Name of the column to get the minimum value for
 * @param schema     Schema of the data
 * @return           Maximum value for the column
 */
public static Writable max(JavaRDD<List<Writable>> allData, String columnName, Schema schema){
    int columnIdx = schema.getIndexOfColumn(columnName);
    JavaRDD<Writable> col = allData.map(new SelectColumnFunction(columnIdx));
    return col.max(Comparators.forType(schema.getType(columnName).getWritableType()));
}
 
Example 20
Source File: AnalyzeSpark.java    From DataVec with Apache License 2.0 2 votes vote down vote up
/**
 * Get a list of unique values from the specified columns.
 * For sequence data, use {@link #getUniqueSequence(List, Schema, JavaRDD)}
 *
 * @param columnName    Name of the column to get unique values from
 * @param schema        Data schema
 * @param data          Data to get unique values from
 * @return              List of unique values
 */
public static List<Writable> getUnique(String columnName, Schema schema, JavaRDD<List<Writable>> data) {
    int colIdx = schema.getIndexOfColumn(columnName);
    JavaRDD<Writable> ithColumn = data.map(new SelectColumnFunction(colIdx));
    return ithColumn.distinct().collect();
}