Java Code Examples for org.datavec.api.transform.schema.Schema#Builder

The following examples show how to use org.datavec.api.transform.schema.Schema#Builder . You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: ArrowConverterTest.java    From deeplearning4j with Apache License 2.0 6 votes vote down vote up
@Test
public void testArrowBatchSet() {
    Schema.Builder schema = new Schema.Builder();
    List<String> single = new ArrayList<>();
    for(int i = 0; i < 2; i++) {
        schema.addColumnInteger(String.valueOf(i));
        single.add(String.valueOf(i));
    }

    List<List<Writable>> input = Arrays.asList(
            Arrays.<Writable>asList(new IntWritable(0),new IntWritable(1)),
            Arrays.<Writable>asList(new IntWritable(2),new IntWritable(3))
    );

    List<FieldVector> fieldVector = ArrowConverter.toArrowColumns(bufferAllocator,schema.build(),input);
    ArrowWritableRecordBatch writableRecordBatch = new ArrowWritableRecordBatch(fieldVector,schema.build());
    List<Writable> assertion = Arrays.<Writable>asList(new IntWritable(4), new IntWritable(5));
    writableRecordBatch.set(1, Arrays.<Writable>asList(new IntWritable(4),new IntWritable(5)));
    List<Writable> recordTest = writableRecordBatch.get(1);
    assertEquals(assertion,recordTest);
}
 
Example 2
Source File: RecordMapperTest.java    From DataVec with Apache License 2.0 6 votes vote down vote up
private Triple<String,Schema,List<List<Writable>>> records() {
    List<List<Writable>> list = new ArrayList<>();
    StringBuilder sb = new StringBuilder();
    int numColumns = 3;
    for (int i = 0; i < 10; i++) {
        List<Writable> temp = new ArrayList<>();
        for (int j = 0; j < numColumns; j++) {
            int v = 100 * i + j;
            temp.add(new IntWritable(v));
            sb.append(v);
            if (j < 2)
                sb.append(",");
            else if (i != 9)
                sb.append("\n");
        }
        list.add(temp);
    }


    Schema.Builder schemaBuilder = new Schema.Builder();
    for(int i = 0; i < numColumns; i++) {
        schemaBuilder.addColumnInteger(String.valueOf(i));
    }

    return Triple.of(sb.toString(),schemaBuilder.build(),list);
}
 
Example 3
Source File: RecordMapperTest.java    From deeplearning4j with Apache License 2.0 6 votes vote down vote up
private Triple<String,Schema,List<List<Writable>>> records() {
    List<List<Writable>> list = new ArrayList<>();
    StringBuilder sb = new StringBuilder();
    int numColumns = 3;
    for (int i = 0; i < 10; i++) {
        List<Writable> temp = new ArrayList<>();
        for (int j = 0; j < numColumns; j++) {
            int v = 100 * i + j;
            temp.add(new IntWritable(v));
            sb.append(v);
            if (j < 2)
                sb.append(",");
            else if (i != 9)
                sb.append("\n");
        }
        list.add(temp);
    }


    Schema.Builder schemaBuilder = new Schema.Builder();
    for(int i = 0; i < numColumns; i++) {
        schemaBuilder.addColumnInteger(String.valueOf(i));
    }

    return Triple.of(sb.toString(),schemaBuilder.build(),list);
}
 
Example 4
Source File: PythonUtils.java    From deeplearning4j with Apache License 2.0 6 votes vote down vote up
/**
 * Create a {@link Schema}
 * from {@link PythonVariables}.
 * Types are mapped to types of the same name.
 *
 * @param input the input {@link PythonVariables}
 * @return the output {@link Schema}
 */
public static Schema fromPythonVariables(PythonVariables input) {
    Schema.Builder schemaBuilder = new Schema.Builder();
    Preconditions.checkState(input.getVariables() != null && input.getVariables().length > 0, "Input must have variables. Found none.");
    for (String varName: input.getVariables()) {

        switch (input.getType(varName).getName()) {
            case INT:
                schemaBuilder.addColumnInteger(varName);
                break;
            case STR:
                schemaBuilder.addColumnString(varName);
                break;
            case FLOAT:
                schemaBuilder.addColumnFloat(varName);
                break;
            case NDARRAY:
                schemaBuilder.addColumnNDArray(varName, null);
                break;
            case BOOL:
                schemaBuilder.addColumn(new BooleanMetaData(varName));
        }
    }

    return schemaBuilder.build();
}
 
Example 5
Source File: PmmlUtils.java    From konduit-serving with Apache License 2.0 6 votes vote down vote up
/**
 * Convert a {@link DataDictionary}
 * to a schema {@link Schema}
 *
 * @param pmml the target {@link PMML} document
 * @return the equivalent {@link Schema}
 */
public static Schema inputSchema(PMML pmml) {
    DataDictionary dataDictionary = pmml.getDataDictionary();
    Schema.Builder ret = new Schema.Builder();
    MiningSchema miningSchema = pmml.getModels().get(0).getMiningSchema();
    Set<FieldName> outputNames = new HashSet<>();
    //ensure we only grab output fields
    for (MiningField miningField : miningSchema.getMiningFields()) {
        if (miningField.getUsageType() == MiningField.UsageType.PREDICTED) {
            outputNames.add(miningField.getName());
        }
    }
    for (int i = 0; i < dataDictionary.getNumberOfFields(); i++) {
        String name = dataDictionary.getDataFields().get(i).getName().getValue();
        if (!outputNames.contains(dataDictionary.getDataFields().get(i).getName()))
            addDataTypeForSchema(dataDictionary.getDataFields().get(i).getDataType(), ret, name);

    }

    return ret.build();
}
 
Example 6
Source File: SchemaTypeUtilsTest.java    From konduit-serving with Apache License 2.0 6 votes vote down vote up
@Test
public void testTypeMappingsForSchema() {
    Schema.Builder schemaBuilder = new Schema.Builder();
    schemaBuilder.addColumnInteger("int");
    schemaBuilder.addColumnLong("long");
    schemaBuilder.addColumnNDArray("ndarray",new long[]{1,1});
    schemaBuilder.addColumnString("string");
    schemaBuilder.addColumnCategorical("categorical","cat1","cat2");
    schemaBuilder.addColumnFloat("float");
    schemaBuilder.addColumnDouble("double");
    schemaBuilder.addColumnBoolean("boolean");

    Schema convert = schemaBuilder.build();
    final Map<String, SchemaType> result = SchemaTypeUtils.typeMappingsForSchema(convert);

}
 
Example 7
Source File: ParseDoubleTransform.java    From DataVec with Apache License 2.0 5 votes vote down vote up
/**
 * Get the output schema for this transformation, given an input schema
 *
 * @param inputSchema
 */
@Override
public Schema transform(Schema inputSchema) {
    Schema.Builder newSchema = new Schema.Builder();
    for (int i = 0; i < inputSchema.numColumns(); i++) {
        if (inputSchema.getType(i) == ColumnType.String) {
            newSchema.addColumnDouble(inputSchema.getMetaData(i).getName());
        } else
            newSchema.addColumn(inputSchema.getMetaData(i));

    }
    return newSchema.build();
}
 
Example 8
Source File: TestTransforms.java    From deeplearning4j with Apache License 2.0 5 votes vote down vote up
public static Schema getSchema(ColumnType type, String... colNames) {

        Schema.Builder schema = new Schema.Builder();

        switch (type) {
            case String:
                schema.addColumnString("column");
                break;
            case Integer:
                schema.addColumnInteger("column");
                break;
            case Long:
                schema.addColumnLong("column");
                break;
            case Double:
                schema.addColumnDouble("column");
                break;
            case Float:
                schema.addColumnFloat("column");
            case Categorical:
                schema.addColumnCategorical("column", colNames);
                break;
            case Time:
                schema.addColumnTime("column", DateTimeZone.UTC);
                break;
            default:
                throw new RuntimeException();
        }
        return schema.build();
    }
 
Example 9
Source File: PythonStepRunner.java    From konduit-serving with Apache License 2.0 5 votes vote down vote up
protected Schema schemaForVariables(PythonVariables pythonVariables) {
    Schema.Builder schemaBuilder = new Schema.Builder();
    String[] varNames = pythonVariables.getVariables();
    for (String name : varNames) {
        PythonType pyType = pythonVariables.getType(name);
        switch (pyType.getName()) {
            case INT:
                schemaBuilder.addColumnLong(name);
                break;
            case FLOAT:
                schemaBuilder.addColumnDouble(name);
                break;
            case STR:
            case DICT:
            case LIST:
                schemaBuilder.addColumnString(name);
                break;
            case NDARRAY:
                INDArray arr = pythonVariables.getNDArrayValue(name);
                if (arr == null)
                    schemaBuilder.addColumnNDArray(name, new long[]{1, 1});
                else
                    schemaBuilder.addColumnNDArray(name, arr.shape());
                break;
            case BOOL:
                schemaBuilder.addColumnBoolean(name);
                break;
            default:
                throw new IllegalStateException("Unable to support type " + pyType.getName().name());
        }
    }

    return schemaBuilder.build();
}
 
Example 10
Source File: ArrowUtils.java    From konduit-serving with Apache License 2.0 5 votes vote down vote up
public static Schema toDatavecSchema(org.apache.arrow.vector.types.pojo.Schema schema) {
    Schema.Builder schemaBuilder = new Schema.Builder();

    for (int i = 0; i < schema.getFields().size(); ++i) {
        schemaBuilder.addColumn(metaDataFromField(schema.getFields().get(i)));
    }

    return schemaBuilder.build();
}
 
Example 11
Source File: NormalizationTests.java    From DataVec with Apache License 2.0 5 votes vote down vote up
@Test
public void testMeanStdZeros() {
    List<List<Writable>> data = new ArrayList<>();
    Schema.Builder builder = new Schema.Builder();
    int numColumns = 6;
    for (int i = 0; i < numColumns; i++)
        builder.addColumnDouble(String.valueOf(i));

    for (int i = 0; i < 5; i++) {
        List<Writable> record = new ArrayList<>(numColumns);
        data.add(record);
        for (int j = 0; j < numColumns; j++) {
            record.add(new DoubleWritable(1.0));
        }

    }

    INDArray arr = RecordConverter.toMatrix(data);

    Schema schema = builder.build();
    JavaRDD<List<Writable>> rdd = sc.parallelize(data);
    DataRowsFacade dataFrame = DataFrames.toDataFrame(schema, rdd);

    //assert equivalent to the ndarray pre processing
    NormalizerStandardize standardScaler = new NormalizerStandardize();
    standardScaler.fit(new DataSet(arr.dup(), arr.dup()));
    INDArray standardScalered = arr.dup();
    standardScaler.transform(new DataSet(standardScalered, standardScalered));
    DataNormalization zeroToOne = new NormalizerMinMaxScaler();
    zeroToOne.fit(new DataSet(arr.dup(), arr.dup()));
    INDArray zeroToOnes = arr.dup();
    zeroToOne.transform(new DataSet(zeroToOnes, zeroToOnes));
    List<Row> rows = Normalization.stdDevMeanColumns(dataFrame, dataFrame.get().columns());
    INDArray assertion = DataFrames.toMatrix(rows);
    //compare standard deviation
    assertTrue(standardScaler.getStd().equalsWithEps(assertion.getRow(0), 1e-1));
    //compare mean
    assertTrue(standardScaler.getMean().equalsWithEps(assertion.getRow(1), 1e-1));

}
 
Example 12
Source File: TokenizerBagOfWordsTermSequenceIndexTransform.java    From deeplearning4j with Apache License 2.0 5 votes vote down vote up
@Override
public Schema transform(Schema inputSchema) {
    Schema.Builder newSchema = new Schema.Builder();
    for(int i = 0; i < inputSchema.numColumns(); i++) {
        if(inputSchema.getName(i).equals(this.columnName)) {
            newSchema.addColumnNDArray(newColumName,new long[]{1,wordIndexMap.size()});
        }
        else {
            newSchema.addColumn(inputSchema.getMetaData(i));
        }
    }

    return newSchema.build();
}
 
Example 13
Source File: ArrowConverterTest.java    From deeplearning4j with Apache License 2.0 5 votes vote down vote up
@Test
public void testArrowColumnINDArray() {
    Schema.Builder schema = new Schema.Builder();
    List<String> single = new ArrayList<>();
    int numCols = 2;
    INDArray arr = Nd4j.linspace(1,4,4);
    for(int i = 0; i < numCols; i++) {
        schema.addColumnNDArray(String.valueOf(i),new long[]{1,4});
        single.add(String.valueOf(i));
    }

    Schema buildSchema = schema.build();
    List<List<Writable>> list = new ArrayList<>();
    List<Writable> firstRow = new ArrayList<>();
    for(int i = 0 ; i < numCols; i++) {
        firstRow.add(new NDArrayWritable(arr));
    }

    list.add(firstRow);

    List<FieldVector> fieldVectors = ArrowConverter.toArrowColumns(bufferAllocator, buildSchema, list);
    assertEquals(numCols,fieldVectors.size());
    assertEquals(1,fieldVectors.get(0).getValueCount());
    assertFalse(fieldVectors.get(0).isNull(0));

    ArrowWritableRecordBatch arrowWritableRecordBatch = ArrowConverter.toArrowWritables(fieldVectors, buildSchema);
    assertEquals(1,arrowWritableRecordBatch.size());

    Writable writable = arrowWritableRecordBatch.get(0).get(0);
    assertTrue(writable instanceof NDArrayWritable);
    NDArrayWritable ndArrayWritable = (NDArrayWritable) writable;
    assertEquals(arr,ndArrayWritable.get());

    Writable writable1 = ArrowConverter.fromEntry(0, fieldVectors.get(0), ColumnType.NDArray);
    NDArrayWritable ndArrayWritablewritable1 = (NDArrayWritable) writable1;
    System.out.println(ndArrayWritablewritable1.get());

}
 
Example 14
Source File: ArrowConverterTest.java    From DataVec with Apache License 2.0 5 votes vote down vote up
@Test
public void testArrowColumnString() {
    Schema.Builder schema = new Schema.Builder();
    List<String> single = new ArrayList<>();
    for(int i = 0; i < 2; i++) {
        schema.addColumnInteger(String.valueOf(i));
        single.add(String.valueOf(i));
    }


    List<FieldVector> fieldVectors = ArrowConverter.toArrowColumnsStringSingle(bufferAllocator, schema.build(), single);
    List<List<Writable>> records = ArrowConverter.toArrowWritables(fieldVectors, schema.build());
    List<List<Writable>> assertion = new ArrayList<>();
    assertion.add(Arrays.<Writable>asList(new IntWritable(0),new IntWritable(1)));
    assertEquals(assertion,records);

    List<List<String>> batch = new ArrayList<>();
    for(int i = 0; i < 2; i++) {
        batch.add(Arrays.asList(String.valueOf(i),String.valueOf(i)));
    }

    List<FieldVector> fieldVectorsBatch = ArrowConverter.toArrowColumnsString(bufferAllocator, schema.build(), batch);
    List<List<Writable>> batchRecords = ArrowConverter.toArrowWritables(fieldVectorsBatch, schema.build());

    List<List<Writable>> assertionBatch = new ArrayList<>();
    assertionBatch.add(Arrays.<Writable>asList(new IntWritable(0),new IntWritable(0)));
    assertionBatch.add(Arrays.<Writable>asList(new IntWritable(1),new IntWritable(1)));
    assertEquals(assertionBatch,batchRecords);


}
 
Example 15
Source File: NormalizationTests.java    From DataVec with Apache License 2.0 4 votes vote down vote up
@Test
public void normalizationTests() {
    List<List<Writable>> data = new ArrayList<>();
    Schema.Builder builder = new Schema.Builder();
    int numColumns = 6;
    for (int i = 0; i < numColumns; i++)
        builder.addColumnDouble(String.valueOf(i));

    for (int i = 0; i < 5; i++) {
        List<Writable> record = new ArrayList<>(numColumns);
        data.add(record);
        for (int j = 0; j < numColumns; j++) {
            record.add(new DoubleWritable(1.0));
        }

    }

    INDArray arr = RecordConverter.toMatrix(data);

    Schema schema = builder.build();
    JavaRDD<List<Writable>> rdd = sc.parallelize(data);
    assertEquals(schema, DataFrames.fromStructType(DataFrames.fromSchema(schema)));
    assertEquals(rdd.collect(), DataFrames.toRecords(DataFrames.toDataFrame(schema, rdd)).getSecond().collect());

    DataRowsFacade dataFrame = DataFrames.toDataFrame(schema, rdd);
    dataFrame.get().show();
    Normalization.zeromeanUnitVariance(dataFrame).get().show();
    Normalization.normalize(dataFrame).get().show();

    //assert equivalent to the ndarray pre processing
    NormalizerStandardize standardScaler = new NormalizerStandardize();
    standardScaler.fit(new DataSet(arr.dup(), arr.dup()));
    INDArray standardScalered = arr.dup();
    standardScaler.transform(new DataSet(standardScalered, standardScalered));
    DataNormalization zeroToOne = new NormalizerMinMaxScaler();
    zeroToOne.fit(new DataSet(arr.dup(), arr.dup()));
    INDArray zeroToOnes = arr.dup();
    zeroToOne.transform(new DataSet(zeroToOnes, zeroToOnes));

    INDArray zeroMeanUnitVarianceDataFrame =
                    RecordConverter.toMatrix(Normalization.zeromeanUnitVariance(schema, rdd).collect());
    INDArray zeroMeanUnitVarianceDataFrameZeroToOne =
                    RecordConverter.toMatrix(Normalization.normalize(schema, rdd).collect());
    assertEquals(standardScalered, zeroMeanUnitVarianceDataFrame);
    assertTrue(zeroToOnes.equalsWithEps(zeroMeanUnitVarianceDataFrameZeroToOne, 1e-1));

}
 
Example 16
Source File: NormalizationTests.java    From deeplearning4j with Apache License 2.0 4 votes vote down vote up
@Test
public void normalizationTests() {
    List<List<Writable>> data = new ArrayList<>();
    Schema.Builder builder = new Schema.Builder();
    int numColumns = 6;
    for (int i = 0; i < numColumns; i++)
        builder.addColumnDouble(String.valueOf(i));

    for (int i = 0; i < 5; i++) {
        List<Writable> record = new ArrayList<>(numColumns);
        data.add(record);
        for (int j = 0; j < numColumns; j++) {
            record.add(new DoubleWritable(1.0));
        }

    }

    INDArray arr = RecordConverter.toMatrix(DataType.DOUBLE, data);

    Schema schema = builder.build();
    JavaRDD<List<Writable>> rdd = sc.parallelize(data);
    assertEquals(schema, DataFrames.fromStructType(DataFrames.fromSchema(schema)));
    assertEquals(rdd.collect(), DataFrames.toRecords(DataFrames.toDataFrame(schema, rdd)).getSecond().collect());

    Dataset<Row> dataFrame = DataFrames.toDataFrame(schema, rdd);
    dataFrame.show();
    Normalization.zeromeanUnitVariance(dataFrame).show();
    Normalization.normalize(dataFrame).show();

    //assert equivalent to the ndarray pre processing
    NormalizerStandardize standardScaler = new NormalizerStandardize();
    standardScaler.fit(new DataSet(arr.dup(), arr.dup()));
    INDArray standardScalered = arr.dup();
    standardScaler.transform(new DataSet(standardScalered, standardScalered));
    DataNormalization zeroToOne = new NormalizerMinMaxScaler();
    zeroToOne.fit(new DataSet(arr.dup(), arr.dup()));
    INDArray zeroToOnes = arr.dup();
    zeroToOne.transform(new DataSet(zeroToOnes, zeroToOnes));

    INDArray zeroMeanUnitVarianceDataFrame =
                    RecordConverter.toMatrix(DataType.DOUBLE, Normalization.zeromeanUnitVariance(schema, rdd).collect());
    INDArray zeroMeanUnitVarianceDataFrameZeroToOne =
                    RecordConverter.toMatrix(DataType.DOUBLE, Normalization.normalize(schema, rdd).collect());
    assertEquals(standardScalered, zeroMeanUnitVarianceDataFrame);
    assertTrue(zeroToOnes.equalsWithEps(zeroMeanUnitVarianceDataFrameZeroToOne, 1e-1));

}
 
Example 17
Source File: SchemaTypeUtilsTest.java    From konduit-serving with Apache License 2.0 4 votes vote down vote up
@Test
public void testToSchema() {
    SchemaType[] values = SchemaType.values();
    Schema.Builder schemaBuilder = new Schema.Builder();

    final List<String> names = new ArrayList<>();
    for(SchemaType value : values) {
        names.add(value.name());
        switch(value) {
            case NDArray:
                schemaBuilder.addColumnNDArray(value.name(),new long[]{1,1});
                break;
            case Boolean:
                schemaBuilder.addColumnBoolean(value.name());
                 break;
            case Float:
                schemaBuilder.addColumnFloat(value.name());
                break;
            case Double:
                schemaBuilder.addColumnDouble(value.name());
                break;
            case Image:
                BinaryMetaData binaryMetaDataImage = new BinaryMetaData(value.name());
                schemaBuilder.addColumn(binaryMetaDataImage);
                break;
            case Integer:
                schemaBuilder.addColumnInteger(value.name());
                break;
            case String:
                schemaBuilder.addColumnString(value.name());
                break;
            case Time:
                schemaBuilder.addColumnTime(value.name(),TimeZone.getDefault());
                break;
            case Categorical:
                schemaBuilder.addColumnCategorical(value.name());
                break;
            case Bytes:
                BinaryMetaData binaryMetaData = new BinaryMetaData(value.name());
                schemaBuilder.addColumn(binaryMetaData);
                break;
            case Long:
                schemaBuilder.addColumnLong(value.name());
                break;

        }
    }


    Schema expected = schemaBuilder.build();
    // Run the test
    final Schema result = SchemaTypeUtils.toSchema(values, names);

    // Verify the results
    assertEquals(expected, result);
}
 
Example 18
Source File: DataFramesTests.java    From DataVec with Apache License 2.0 4 votes vote down vote up
@Test
public void testDataFrameConversions() {
    List<List<Writable>> data = new ArrayList<>();
    Schema.Builder builder = new Schema.Builder();
    int numColumns = 6;
    for (int i = 0; i < numColumns; i++)
        builder.addColumnDouble(String.valueOf(i));

    for (int i = 0; i < 5; i++) {
        List<Writable> record = new ArrayList<>(numColumns);
        data.add(record);
        for (int j = 0; j < numColumns; j++) {
            record.add(new DoubleWritable(1.0));
        }

    }

    Schema schema = builder.build();
    JavaRDD<List<Writable>> rdd = sc.parallelize(data);
    assertEquals(schema, DataFrames.fromStructType(DataFrames.fromSchema(schema)));
    assertEquals(rdd.collect(), DataFrames.toRecords(DataFrames.toDataFrame(schema, rdd)).getSecond().collect());

    DataRowsFacade dataFrame = DataFrames.toDataFrame(schema, rdd);
    dataFrame.get().show();
    Column mean = DataFrames.mean(dataFrame, "0");
    Column std = DataFrames.std(dataFrame, "0");
    dataFrame.get().withColumn("0", dataFrame.get().col("0").minus(mean)).show();
    dataFrame.get().withColumn("0", dataFrame.get().col("0").divide(std)).show();

    /*   DataFrame desc = dataFrame.describe(dataFrame.columns());
    dataFrame.show();
    System.out.println(dataFrame.agg(avg("0"), dataFrame.col("0")));
    dataFrame.withColumn("0",dataFrame.col("0").minus(avg(dataFrame.col("0"))));
    dataFrame.show();
    
    
    for(String column : dataFrame.columns()) {
        System.out.println(DataFrames.mean(desc,column));
        System.out.println(DataFrames.min(desc,column));
        System.out.println(DataFrames.max(desc,column));
        System.out.println(DataFrames.std(desc,column));
    
    }*/
}
 
Example 19
Source File: SchemaTypeUtils.java    From konduit-serving with Apache License 2.0 4 votes vote down vote up
/**
 * Create a {@link Schema} from the given {@link SchemaType} and the names.
 * Note that exceptions are thrown when the types are null, names are null, or the 2 arguments are not the same length
 *
 * @param types the type
 * @param names the names of each column
 * @return the equivalent {@link Schema} given the types and names
 */
public static Schema toSchema(SchemaType[] types, List<String> names) {
    Preconditions.checkNotNull(types, "Please specify types");
    Preconditions.checkNotNull(names, "Please specify names.");
    Preconditions.checkState(types.length == names.size(), "Types and names must be the same length");
    Schema.Builder builder = new Schema.Builder();
    for (int i = 0; i < types.length; i++) {
        Preconditions.checkNotNull(types[i], "Type " + i + " was null!");
        switch (types[i]) {
            case NDArray:
                builder.addColumnNDArray(names.get(i), new long[]{1, 1});
                break;
            case String:
                builder.addColumnString(names.get(i));
                break;
            case Boolean:
                builder.addColumnBoolean(names.get(i));
                break;
            case Categorical:
                builder.addColumnCategorical(names.get(i));
                break;
            case Float:
                builder.addColumnFloat(names.get(i));
                break;
            case Double:
                builder.addColumnDouble(names.get(i));
                break;
            case Integer:
                builder.addColumnInteger(names.get(i));
                break;
            case Long:
                builder.addColumnLong(names.get(i));
                break;
            case Bytes:
            case Image:
                BinaryMetaData binaryMetaData = new BinaryMetaData(names.get(i));
                builder.addColumn(binaryMetaData);
                break;
            case Time:
                builder.addColumnTime(names.get(i),TimeZone.getDefault());
                break;
            default:
                throw new UnsupportedOperationException("Unknown type " + types[i]);

        }
    }

    return builder.build();
}
 
Example 20
Source File: DataFramesTests.java    From deeplearning4j with Apache License 2.0 4 votes vote down vote up
@Test
public void testDataFrameConversions() {
    List<List<Writable>> data = new ArrayList<>();
    Schema.Builder builder = new Schema.Builder();
    int numColumns = 6;
    for (int i = 0; i < numColumns; i++)
        builder.addColumnDouble(String.valueOf(i));

    for (int i = 0; i < 5; i++) {
        List<Writable> record = new ArrayList<>(numColumns);
        data.add(record);
        for (int j = 0; j < numColumns; j++) {
            record.add(new DoubleWritable(1.0));
        }

    }

    Schema schema = builder.build();
    JavaRDD<List<Writable>> rdd = sc.parallelize(data);
    assertEquals(schema, DataFrames.fromStructType(DataFrames.fromSchema(schema)));
    assertEquals(rdd.collect(), DataFrames.toRecords(DataFrames.toDataFrame(schema, rdd)).getSecond().collect());

    Dataset<Row> dataFrame = DataFrames.toDataFrame(schema, rdd);
    dataFrame.show();
    Column mean = DataFrames.mean(dataFrame, "0");
    Column std = DataFrames.std(dataFrame, "0");
    dataFrame.withColumn("0", dataFrame.col("0").minus(mean)).show();
    dataFrame.withColumn("0", dataFrame.col("0").divide(std)).show();

    /*   DataFrame desc = dataFrame.describe(dataFrame.columns());
    dataFrame.show();
    System.out.println(dataFrame.agg(avg("0"), dataFrame.col("0")));
    dataFrame.withColumn("0",dataFrame.col("0").minus(avg(dataFrame.col("0"))));
    dataFrame.show();
    
    
    for(String column : dataFrame.columns()) {
        System.out.println(DataFrames.mean(desc,column));
        System.out.println(DataFrames.min(desc,column));
        System.out.println(DataFrames.max(desc,column));
        System.out.println(DataFrames.std(desc,column));
    
    }*/
}