Java Code Examples for org.datavec.api.transform.TransformProcess#getFinalSchema()

The following examples show how to use org.datavec.api.transform.TransformProcess#getFinalSchema() . You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: BaseJsonArrayConverter.java    From konduit-serving with Apache License 2.0 6 votes vote down vote up
protected Pair<Map<Integer, Integer>, List<? extends Map<FieldName, ?>>> doTransformProcessConvertPmmlWithErrors(Schema schema, JsonArray jsonArray, TransformProcess transformProcess, DataPipelineErrorHandler dataPipelineErrorHandler) {
    Schema outputSchema = transformProcess.getFinalSchema();

    if (!transformProcess.getInitialSchema().equals(schema)) {
        throw new IllegalArgumentException("Transform process specified, but does not match target input inputSchema");
    }


    List<Map<FieldName, Object>> ret = new ArrayList<>(jsonArray.size());
    List<FieldName> fieldNames = getNameRepresentationFor(outputSchema);

    Pair<Map<Integer, Integer>, ArrowWritableRecordBatch> convertWithErrors = convertWithErrors(schema, jsonArray, transformProcess, dataPipelineErrorHandler);
    ArrowWritableRecordBatch conversion = convertWithErrors.getRight();
    for (int i = 0; i < conversion.size(); i++) {
        List<Writable> recordToMap = conversion.get(i);
        Map<FieldName, Object> record = new LinkedHashMap();
        for (int j = 0; j < outputSchema.numColumns(); j++) {
            record.put(fieldNames.get(j), WritableValueRetriever.getUnderlyingValue(recordToMap.get(j)));

        }

        ret.add(record);
    }

    return Pair.of(convertWithErrors.getKey(), ret);
}
 
Example 2
Source File: LocalTransformExecutor.java    From DataVec with Apache License 2.0 5 votes vote down vote up
/**
 * Execute the specified TransformProcess with the given input data<br>
 * Note: this method can only be used if the TransformProcess returns non-sequence data. For TransformProcesses
 * that return a sequence, use {@link #executeToSequence(List, TransformProcess)}
 *
 * @param inputWritables   Input data to process
 * @param transformProcess TransformProcess to execute
 * @return Processed data
 */
public static List<List<Writable>> execute(List<List<Writable>> inputWritables,
                                           TransformProcess transformProcess) {
    if (transformProcess.getFinalSchema() instanceof SequenceSchema) {
        throw new IllegalStateException("Cannot return sequence data with this method");

    }

    List<List<Writable>> filteredSequence = inputWritables.parallelStream()
            .filter(input -> input.size() == transformProcess.getInitialSchema().numColumns()).collect(toList());
    if(filteredSequence.size() != inputWritables.size()) {
        log.warn("Filtered out " + (inputWritables.size() - filteredSequence.size()) + " values");
    }
    return execute(filteredSequence, null, transformProcess).getFirst();
}
 
Example 3
Source File: SparkTransformExecutor.java    From deeplearning4j with Apache License 2.0 5 votes vote down vote up
/**
 * Execute the specified TransformProcess with the given <i>sequence</i> input data<br>
 * Note: this method can only be used if the TransformProcess starts with sequence data, and also returns sequence data
 *
 * @param inputSequence    Input sequence data to process
 * @param transformProcess TransformProcess to execute
 * @return Processed (non-sequential) data
 */
public static JavaRDD<List<List<Writable>>> executeSequenceToSequence(JavaRDD<List<List<Writable>>> inputSequence,
                TransformProcess transformProcess) {
    if (!(transformProcess.getFinalSchema() instanceof SequenceSchema)) {
        throw new IllegalStateException("Cannot return non-sequence data with this method");
    }

    return execute(null, inputSequence, transformProcess).getSecond();
}
 
Example 4
Source File: TestCalculateSortedRank.java    From DataVec with Apache License 2.0 5 votes vote down vote up
@Test
public void testCalculateSortedRank() {

    List<List<Writable>> data = new ArrayList<>();
    data.add(Arrays.asList((Writable) new Text("0"), new DoubleWritable(0.0)));
    data.add(Arrays.asList((Writable) new Text("3"), new DoubleWritable(0.3)));
    data.add(Arrays.asList((Writable) new Text("2"), new DoubleWritable(0.2)));
    data.add(Arrays.asList((Writable) new Text("1"), new DoubleWritable(0.1)));

    List<List<Writable>> rdd = (data);

    Schema schema = new Schema.Builder().addColumnsString("TextCol").addColumnDouble("DoubleCol").build();

    TransformProcess tp = new TransformProcess.Builder(schema)
                    .calculateSortedRank("rank", "DoubleCol", new DoubleWritableComparator()).build();

    Schema outSchema = tp.getFinalSchema();
    assertEquals(3, outSchema.numColumns());
    assertEquals(Arrays.asList("TextCol", "DoubleCol", "rank"), outSchema.getColumnNames());
    assertEquals(Arrays.asList(ColumnType.String, ColumnType.Double, ColumnType.Long), outSchema.getColumnTypes());

    List<List<Writable>> out = LocalTransformExecutor.execute(rdd, tp);

    List<List<Writable>> collected = out;
    assertEquals(4, collected.size());
    for (int i = 0; i < 4; i++)
        assertEquals(3, collected.get(i).size());

    for (List<Writable> example : collected) {
        int exampleNum = example.get(0).toInt();
        int rank = example.get(2).toInt();
        assertEquals(exampleNum, rank);
    }
}
 
Example 5
Source File: TestCalculateSortedRank.java    From deeplearning4j with Apache License 2.0 5 votes vote down vote up
@Test
public void testCalculateSortedRank() {

    List<List<Writable>> data = new ArrayList<>();
    data.add(Arrays.asList((Writable) new Text("0"), new DoubleWritable(0.0)));
    data.add(Arrays.asList((Writable) new Text("3"), new DoubleWritable(0.3)));
    data.add(Arrays.asList((Writable) new Text("2"), new DoubleWritable(0.2)));
    data.add(Arrays.asList((Writable) new Text("1"), new DoubleWritable(0.1)));

    JavaRDD<List<Writable>> rdd = sc.parallelize(data);

    Schema schema = new Schema.Builder().addColumnsString("TextCol").addColumnDouble("DoubleCol").build();

    TransformProcess tp = new TransformProcess.Builder(schema)
                    .calculateSortedRank("rank", "DoubleCol", new DoubleWritableComparator()).build();

    Schema outSchema = tp.getFinalSchema();
    assertEquals(3, outSchema.numColumns());
    assertEquals(Arrays.asList("TextCol", "DoubleCol", "rank"), outSchema.getColumnNames());
    assertEquals(Arrays.asList(ColumnType.String, ColumnType.Double, ColumnType.Long), outSchema.getColumnTypes());

    JavaRDD<List<Writable>> out = SparkTransformExecutor.execute(rdd, tp);

    List<List<Writable>> collected = out.collect();
    assertEquals(4, collected.size());
    for (int i = 0; i < 4; i++)
        assertEquals(3, collected.get(i).size());

    for (List<Writable> example : collected) {
        int exampleNum = example.get(0).toInt();
        int rank = example.get(2).toInt();
        assertEquals(exampleNum, rank);
    }
}
 
Example 6
Source File: TestCalculateSortedRank.java    From deeplearning4j with Apache License 2.0 5 votes vote down vote up
@Test
public void testCalculateSortedRank() {

    List<List<Writable>> data = new ArrayList<>();
    data.add(Arrays.asList((Writable) new Text("0"), new DoubleWritable(0.0)));
    data.add(Arrays.asList((Writable) new Text("3"), new DoubleWritable(0.3)));
    data.add(Arrays.asList((Writable) new Text("2"), new DoubleWritable(0.2)));
    data.add(Arrays.asList((Writable) new Text("1"), new DoubleWritable(0.1)));

    List<List<Writable>> rdd = (data);

    Schema schema = new Schema.Builder().addColumnsString("TextCol").addColumnDouble("DoubleCol").build();

    TransformProcess tp = new TransformProcess.Builder(schema)
                    .calculateSortedRank("rank", "DoubleCol", new DoubleWritableComparator()).build();

    Schema outSchema = tp.getFinalSchema();
    assertEquals(3, outSchema.numColumns());
    assertEquals(Arrays.asList("TextCol", "DoubleCol", "rank"), outSchema.getColumnNames());
    assertEquals(Arrays.asList(ColumnType.String, ColumnType.Double, ColumnType.Long), outSchema.getColumnTypes());

    List<List<Writable>> out = LocalTransformExecutor.execute(rdd, tp);

    List<List<Writable>> collected = out;
    assertEquals(4, collected.size());
    for (int i = 0; i < 4; i++)
        assertEquals(3, collected.get(i).size());

    for (List<Writable> example : collected) {
        int exampleNum = example.get(0).toInt();
        int rank = example.get(2).toInt();
        assertEquals(exampleNum, rank);
    }
}
 
Example 7
Source File: LocalTransformExecutor.java    From deeplearning4j with Apache License 2.0 5 votes vote down vote up
/**
 * Execute the specified TransformProcess with the given <i>sequence</i> input data<br>
 * Note: this method can only be used if the TransformProcess starts with sequence data, and also returns sequence data
 *
 * @param inputSequence    Input sequence data to process
 * @param transformProcess TransformProcess to execute
 * @return Processed (non-sequential) data
 */
public static List<List<List<Writable>>> executeSequenceToSequence(List<List<List<Writable>>> inputSequence,
                                                                   TransformProcess transformProcess) {
    if (!(transformProcess.getFinalSchema() instanceof SequenceSchema)) {
        List<List<List<Writable>>> ret = new ArrayList<>(inputSequence.size());
        for(List<List<Writable>> timeStep : inputSequence) {
            ret.add(execute(timeStep,null, transformProcess).getFirst());
        }

        return ret;
    }

    return execute(null, inputSequence, transformProcess).getSecond();
}
 
Example 8
Source File: SparkTransformExecutor.java    From DataVec with Apache License 2.0 5 votes vote down vote up
/**
 * Execute the specified TransformProcess with the given <i>sequence</i> input data<br>
 * Note: this method can only be used if the TransformProcess starts with sequence data, and also returns sequence data
 *
 * @param inputSequence    Input sequence data to process
 * @param transformProcess TransformProcess to execute
 * @return Processed (non-sequential) data
 */
public static JavaRDD<List<List<Writable>>> executeSequenceToSequence(JavaRDD<List<List<Writable>>> inputSequence,
                TransformProcess transformProcess) {
    if (!(transformProcess.getFinalSchema() instanceof SequenceSchema)) {
        throw new IllegalStateException("Cannot return non-sequence data with this method");
    }

    return execute(null, inputSequence, transformProcess).getSecond();
}
 
Example 9
Source File: TestCalculateSortedRank.java    From DataVec with Apache License 2.0 5 votes vote down vote up
@Test
public void testCalculateSortedRank() {

    List<List<Writable>> data = new ArrayList<>();
    data.add(Arrays.asList((Writable) new Text("0"), new DoubleWritable(0.0)));
    data.add(Arrays.asList((Writable) new Text("3"), new DoubleWritable(0.3)));
    data.add(Arrays.asList((Writable) new Text("2"), new DoubleWritable(0.2)));
    data.add(Arrays.asList((Writable) new Text("1"), new DoubleWritable(0.1)));

    JavaRDD<List<Writable>> rdd = sc.parallelize(data);

    Schema schema = new Schema.Builder().addColumnsString("TextCol").addColumnDouble("DoubleCol").build();

    TransformProcess tp = new TransformProcess.Builder(schema)
                    .calculateSortedRank("rank", "DoubleCol", new DoubleWritableComparator()).build();

    Schema outSchema = tp.getFinalSchema();
    assertEquals(3, outSchema.numColumns());
    assertEquals(Arrays.asList("TextCol", "DoubleCol", "rank"), outSchema.getColumnNames());
    assertEquals(Arrays.asList(ColumnType.String, ColumnType.Double, ColumnType.Long), outSchema.getColumnTypes());

    JavaRDD<List<Writable>> out = SparkTransformExecutor.execute(rdd, tp);

    List<List<Writable>> collected = out.collect();
    assertEquals(4, collected.size());
    for (int i = 0; i < 4; i++)
        assertEquals(3, collected.get(i).size());

    for (List<Writable> example : collected) {
        int exampleNum = example.get(0).toInt();
        int rank = example.get(2).toInt();
        assertEquals(exampleNum, rank);
    }
}
 
Example 10
Source File: LocalTransformExecutor.java    From deeplearning4j with Apache License 2.0 5 votes vote down vote up
/**
 * Execute the specified TransformProcess with the given input data<br>
 * Note: this method can only be used if the TransformProcess returns non-sequence data. For TransformProcesses
 * that return a sequence, use {@link #executeToSequence(List, TransformProcess)}
 *
 * @param inputWritables   Input data to process
 * @param transformProcess TransformProcess to execute
 * @return Processed data
 */
public static List<List<Writable>> execute(List<List<Writable>> inputWritables,
                                           TransformProcess transformProcess) {
    if (transformProcess.getFinalSchema() instanceof SequenceSchema) {
        throw new IllegalStateException("Cannot return sequence data with this method");

    }

    List<List<Writable>> filteredSequence = inputWritables.parallelStream()
            .filter(input -> input.size() == transformProcess.getInitialSchema().numColumns()).collect(toList());
    if(filteredSequence.size() != inputWritables.size()) {
        log.warn("Filtered out " + (inputWritables.size() - filteredSequence.size()) + " values");
    }
    return execute(filteredSequence, null, transformProcess).getFirst();
}
 
Example 11
Source File: SparkTransformExecutor.java    From deeplearning4j with Apache License 2.0 3 votes vote down vote up
/**
 * Execute the specified TransformProcess with the given <i>sequence</i> input data<br>
 * Note: this method can only be used if the TransformProcess starts with sequence data, but returns <i>non-sequential</i>
 * data (after reducing or converting sequential data to individual examples)
 *
 * @param inputSequence    Input sequence data to process
 * @param transformProcess TransformProcess to execute
 * @return Processed (non-sequential) data
 */
public static JavaRDD<List<Writable>> executeSequenceToSeparate(JavaRDD<List<List<Writable>>> inputSequence,
                TransformProcess transformProcess) {
    if (transformProcess.getFinalSchema() instanceof SequenceSchema) {
        throw new IllegalStateException("Cannot return sequence data with this method");
    }

    return execute(null, inputSequence, transformProcess).getFirst();
}
 
Example 12
Source File: SparkTransformExecutor.java    From deeplearning4j with Apache License 2.0 3 votes vote down vote up
/**
 * Execute the specified TransformProcess with the given input data<br>
 * Note: this method can only be used if the TransformProcess
 * starts with non-sequential data,
 * but returns <it>sequence</it>
 * data (after grouping or converting to a sequence as one of the steps)
 *
 * @param inputWritables   Input data to process
 * @param transformProcess TransformProcess to execute
 * @return Processed (sequence) data
 */
public static JavaRDD<List<List<Writable>>> executeToSequence(JavaRDD<List<Writable>> inputWritables,
                TransformProcess transformProcess) {
    if (!(transformProcess.getFinalSchema() instanceof SequenceSchema)) {
        throw new IllegalStateException("Cannot return non-sequence data with this method");
    }

    return execute(inputWritables, null, transformProcess).getSecond();
}
 
Example 13
Source File: SparkTransformExecutor.java    From deeplearning4j with Apache License 2.0 3 votes vote down vote up
/**
 * Execute the specified TransformProcess with the given input data<br>
 * Note: this method can only be used if the TransformProcess returns non-sequence data. For TransformProcesses
 * that return a sequence, use {@link #executeToSequence(JavaRDD, TransformProcess)}
 *
 * @param inputWritables   Input data to process
 * @param transformProcess TransformProcess to execute
 * @return Processed data
 */
public static JavaRDD<List<Writable>> execute(JavaRDD<List<Writable>> inputWritables,
                TransformProcess transformProcess) {
    if (transformProcess.getFinalSchema() instanceof SequenceSchema) {
        throw new IllegalStateException("Cannot return sequence data with this method");
    }

    return execute(inputWritables, null, transformProcess).getFirst();
}
 
Example 14
Source File: LocalTransformExecutor.java    From deeplearning4j with Apache License 2.0 3 votes vote down vote up
/**
 * Execute the specified TransformProcess with the given <i>sequence</i> input data<br>
 * Note: this method can only be used if the TransformProcess starts with sequence data, but returns <i>non-sequential</i>
 * data (after reducing or converting sequential data to individual examples)
 *
 * @param inputSequence    Input sequence data to process
 * @param transformProcess TransformProcess to execute
 * @return Processed (non-sequential) data
 */
public static List<List<Writable>> executeSequenceToSeparate(List<List<List<Writable>>> inputSequence,
                                                             TransformProcess transformProcess) {
    if (transformProcess.getFinalSchema() instanceof SequenceSchema) {
        throw new IllegalStateException("Cannot return sequence data with this method");
    }

    return execute(null, inputSequence, transformProcess).getFirst();
}
 
Example 15
Source File: LocalTransformExecutor.java    From deeplearning4j with Apache License 2.0 3 votes vote down vote up
/**
 * Execute the specified TransformProcess with the given input data<br>
 * Note: this method can only be used if the TransformProcess
 * starts with non-sequential data,
 * but returns <it>sequence</it>
 * data (after grouping or converting to a sequence as one of the steps)
 *
 * @param inputWritables   Input data to process
 * @param transformProcess TransformProcess to execute
 * @return Processed (sequence) data
 */
public static List<List<List<Writable>>> executeToSequence(List<List<Writable>> inputWritables,
                                                           TransformProcess transformProcess) {
    if (!(transformProcess.getFinalSchema() instanceof SequenceSchema)) {
        throw new IllegalStateException("Cannot return non-sequence data with this method");
    }

    return execute(inputWritables, null, transformProcess).getSecond();
}
 
Example 16
Source File: SparkTransformExecutor.java    From DataVec with Apache License 2.0 3 votes vote down vote up
/**
 * Execute the specified TransformProcess with the given <i>sequence</i> input data<br>
 * Note: this method can only be used if the TransformProcess starts with sequence data, but returns <i>non-sequential</i>
 * data (after reducing or converting sequential data to individual examples)
 *
 * @param inputSequence    Input sequence data to process
 * @param transformProcess TransformProcess to execute
 * @return Processed (non-sequential) data
 */
public static JavaRDD<List<Writable>> executeSequenceToSeparate(JavaRDD<List<List<Writable>>> inputSequence,
                TransformProcess transformProcess) {
    if (transformProcess.getFinalSchema() instanceof SequenceSchema) {
        throw new IllegalStateException("Cannot return sequence data with this method");
    }

    return execute(null, inputSequence, transformProcess).getFirst();
}
 
Example 17
Source File: SparkTransformExecutor.java    From DataVec with Apache License 2.0 3 votes vote down vote up
/**
 * Execute the specified TransformProcess with the given input data<br>
 * Note: this method can only be used if the TransformProcess
 * starts with non-sequential data,
 * but returns <it>sequence</it>
 * data (after grouping or converting to a sequence as one of the steps)
 *
 * @param inputWritables   Input data to process
 * @param transformProcess TransformProcess to execute
 * @return Processed (sequence) data
 */
public static JavaRDD<List<List<Writable>>> executeToSequence(JavaRDD<List<Writable>> inputWritables,
                TransformProcess transformProcess) {
    if (!(transformProcess.getFinalSchema() instanceof SequenceSchema)) {
        throw new IllegalStateException("Cannot return non-sequence data with this method");
    }

    return execute(inputWritables, null, transformProcess).getSecond();
}
 
Example 18
Source File: LocalTransformExecutor.java    From DataVec with Apache License 2.0 3 votes vote down vote up
/**
 * Execute the specified TransformProcess with the given <i>sequence</i> input data<br>
 * Note: this method can only be used if the TransformProcess starts with sequence data, but returns <i>non-sequential</i>
 * data (after reducing or converting sequential data to individual examples)
 *
 * @param inputSequence    Input sequence data to process
 * @param transformProcess TransformProcess to execute
 * @return Processed (non-sequential) data
 */
public static List<List<Writable>> executeSequenceToSeparate(List<List<List<Writable>>> inputSequence,
                                                             TransformProcess transformProcess) {
    if (transformProcess.getFinalSchema() instanceof SequenceSchema) {
        throw new IllegalStateException("Cannot return sequence data with this method");
    }

    return execute(null, inputSequence, transformProcess).getFirst();
}
 
Example 19
Source File: LocalTransformExecutor.java    From DataVec with Apache License 2.0 3 votes vote down vote up
/**
 * Execute the specified TransformProcess with the given input data<br>
 * Note: this method can only be used if the TransformProcess
 * starts with non-sequential data,
 * but returns <it>sequence</it>
 * data (after grouping or converting to a sequence as one of the steps)
 *
 * @param inputWritables   Input data to process
 * @param transformProcess TransformProcess to execute
 * @return Processed (sequence) data
 */
public static List<List<List<Writable>>> executeToSequence(List<List<Writable>> inputWritables,
                                                           TransformProcess transformProcess) {
    if (!(transformProcess.getFinalSchema() instanceof SequenceSchema)) {
        throw new IllegalStateException("Cannot return non-sequence data with this method");
    }

    return execute(inputWritables, null, transformProcess).getSecond();
}
 
Example 20
Source File: BaseJsonArrayConverter.java    From konduit-serving with Apache License 2.0 3 votes vote down vote up
protected List<Map<FieldName, Object>> doTransformProcessConvertPmml(Schema schema, JsonArray jsonArray, TransformProcess transformProcess) {
    Schema outputSchema = transformProcess.getFinalSchema();

    if (!transformProcess.getInitialSchema().equals(schema)) {
        throw new IllegalArgumentException("Transform process specified, but does not match target input inputSchema");
    }


    List<Map<FieldName, Object>> ret = new ArrayList<>(jsonArray.size());
    List<FieldName> fieldNames = getNameRepresentationFor(outputSchema);

    ArrowWritableRecordBatch conversion = convert(schema, jsonArray, transformProcess);
    for (int i = 0; i < conversion.size(); i++) {
        List<Writable> recordToMap = conversion.get(i);
        Map<FieldName, Object> record = new LinkedHashMap();
        for (int j = 0; j < outputSchema.numColumns(); j++) {
            record.put(fieldNames.get(j), WritableValueRetriever.getUnderlyingValue(recordToMap.get(j)));

        }

        ret.add(record);
    }

    return ret;


}