org.datavec.spark.transform.SparkTransformExecutor Java Examples

The following examples show how to use org.datavec.spark.transform.SparkTransformExecutor. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: SparkTransformFunction.java    From DataVec with Apache License 2.0 5 votes vote down vote up
@Override
public List<Writable> call(List<Writable> v1) throws Exception {
    if (SparkTransformExecutor.isTryCatch()) {
        try {
            return transform.map(v1);
        } catch (Exception e) {
            log.warn("Error occurred " + e + " on record " + v1);
            return new ArrayList<>();
        }
    }
    return transform.map(v1);
}
 
Example #2
Source File: TestConvertToSequence.java    From DataVec with Apache License 2.0 5 votes vote down vote up
@Test
public void testConvertToSequenceLength1(){

    Schema s = new Schema.Builder()
            .addColumnsString("string")
            .addColumnLong("long")
            .build();

    List<List<Writable>> allExamples = Arrays.asList(
            Arrays.<Writable>asList(new Text("a"), new LongWritable(0)),
            Arrays.<Writable>asList(new Text("b"), new LongWritable(1)),
            Arrays.<Writable>asList(new Text("c"), new LongWritable(2)));

    TransformProcess tp = new TransformProcess.Builder(s)
            .convertToSequence()
            .build();

    JavaRDD<List<Writable>> rdd = sc.parallelize(allExamples);

    JavaRDD<List<List<Writable>>> out = SparkTransformExecutor.executeToSequence(rdd, tp);

    List<List<List<Writable>>> out2 = out.collect();

    assertEquals(3, out2.size());

    for( int i=0; i<3; i++ ){
        assertTrue(out2.contains(Collections.singletonList(allExamples.get(i))));
    }
}
 
Example #3
Source File: TestCalculateSortedRank.java    From DataVec with Apache License 2.0 5 votes vote down vote up
@Test
public void testCalculateSortedRank() {

    List<List<Writable>> data = new ArrayList<>();
    data.add(Arrays.asList((Writable) new Text("0"), new DoubleWritable(0.0)));
    data.add(Arrays.asList((Writable) new Text("3"), new DoubleWritable(0.3)));
    data.add(Arrays.asList((Writable) new Text("2"), new DoubleWritable(0.2)));
    data.add(Arrays.asList((Writable) new Text("1"), new DoubleWritable(0.1)));

    JavaRDD<List<Writable>> rdd = sc.parallelize(data);

    Schema schema = new Schema.Builder().addColumnsString("TextCol").addColumnDouble("DoubleCol").build();

    TransformProcess tp = new TransformProcess.Builder(schema)
                    .calculateSortedRank("rank", "DoubleCol", new DoubleWritableComparator()).build();

    Schema outSchema = tp.getFinalSchema();
    assertEquals(3, outSchema.numColumns());
    assertEquals(Arrays.asList("TextCol", "DoubleCol", "rank"), outSchema.getColumnNames());
    assertEquals(Arrays.asList(ColumnType.String, ColumnType.Double, ColumnType.Long), outSchema.getColumnTypes());

    JavaRDD<List<Writable>> out = SparkTransformExecutor.execute(rdd, tp);

    List<List<Writable>> collected = out.collect();
    assertEquals(4, collected.size());
    for (int i = 0; i < 4; i++)
        assertEquals(3, collected.get(i).size());

    for (List<Writable> example : collected) {
        int exampleNum = example.get(0).toInt();
        int rank = example.get(2).toInt();
        assertEquals(exampleNum, rank);
    }
}
 
Example #4
Source File: SparkTransformFunction.java    From deeplearning4j with Apache License 2.0 5 votes vote down vote up
@Override
public List<Writable> call(List<Writable> v1) throws Exception {
    if (SparkTransformExecutor.isTryCatch()) {
        try {
            return transform.map(v1);
        } catch (Exception e) {
            log.warn("Error occurred " + e + " on record " + v1);
            return new ArrayList<>();
        }
    }
    return transform.map(v1);
}
 
Example #5
Source File: TestConvertToSequence.java    From deeplearning4j with Apache License 2.0 5 votes vote down vote up
@Test
public void testConvertToSequenceLength1(){

    Schema s = new Schema.Builder()
            .addColumnsString("string")
            .addColumnLong("long")
            .build();

    List<List<Writable>> allExamples = Arrays.asList(
            Arrays.<Writable>asList(new Text("a"), new LongWritable(0)),
            Arrays.<Writable>asList(new Text("b"), new LongWritable(1)),
            Arrays.<Writable>asList(new Text("c"), new LongWritable(2)));

    TransformProcess tp = new TransformProcess.Builder(s)
            .convertToSequence()
            .build();

    JavaRDD<List<Writable>> rdd = sc.parallelize(allExamples);

    JavaRDD<List<List<Writable>>> out = SparkTransformExecutor.executeToSequence(rdd, tp);

    List<List<List<Writable>>> out2 = out.collect();

    assertEquals(3, out2.size());

    for( int i=0; i<3; i++ ){
        assertTrue(out2.contains(Collections.singletonList(allExamples.get(i))));
    }
}
 
Example #6
Source File: TestCalculateSortedRank.java    From deeplearning4j with Apache License 2.0 5 votes vote down vote up
@Test
public void testCalculateSortedRank() {

    List<List<Writable>> data = new ArrayList<>();
    data.add(Arrays.asList((Writable) new Text("0"), new DoubleWritable(0.0)));
    data.add(Arrays.asList((Writable) new Text("3"), new DoubleWritable(0.3)));
    data.add(Arrays.asList((Writable) new Text("2"), new DoubleWritable(0.2)));
    data.add(Arrays.asList((Writable) new Text("1"), new DoubleWritable(0.1)));

    JavaRDD<List<Writable>> rdd = sc.parallelize(data);

    Schema schema = new Schema.Builder().addColumnsString("TextCol").addColumnDouble("DoubleCol").build();

    TransformProcess tp = new TransformProcess.Builder(schema)
                    .calculateSortedRank("rank", "DoubleCol", new DoubleWritableComparator()).build();

    Schema outSchema = tp.getFinalSchema();
    assertEquals(3, outSchema.numColumns());
    assertEquals(Arrays.asList("TextCol", "DoubleCol", "rank"), outSchema.getColumnNames());
    assertEquals(Arrays.asList(ColumnType.String, ColumnType.Double, ColumnType.Long), outSchema.getColumnTypes());

    JavaRDD<List<Writable>> out = SparkTransformExecutor.execute(rdd, tp);

    List<List<Writable>> collected = out.collect();
    assertEquals(4, collected.size());
    for (int i = 0; i < 4; i++)
        assertEquals(3, collected.get(i).size());

    for (List<Writable> example : collected) {
        int exampleNum = example.get(0).toInt();
        int rank = example.get(2).toInt();
        assertEquals(exampleNum, rank);
    }
}
 
Example #7
Source File: TestConvertToSequence.java    From DataVec with Apache License 2.0 4 votes vote down vote up
@Test
public void testConvertToSequenceCompoundKey() {

    Schema s = new Schema.Builder().addColumnsString("key1", "key2").addColumnLong("time").build();

    List<List<Writable>> allExamples =
                    Arrays.asList(Arrays.<Writable>asList(new Text("k1a"), new Text("k2a"), new LongWritable(10)),
                                    Arrays.<Writable>asList(new Text("k1b"), new Text("k2b"), new LongWritable(10)),
                                    Arrays.<Writable>asList(new Text("k1a"), new Text("k2a"),
                                                    new LongWritable(-10)),
                                    Arrays.<Writable>asList(new Text("k1b"), new Text("k2b"), new LongWritable(5)),
                                    Arrays.<Writable>asList(new Text("k1a"), new Text("k2a"), new LongWritable(0)));

    TransformProcess tp = new TransformProcess.Builder(s)
                    .convertToSequence(Arrays.asList("key1", "key2"), new NumericalColumnComparator("time"))
                    .build();

    JavaRDD<List<Writable>> rdd = sc.parallelize(allExamples);

    List<List<List<Writable>>> out = SparkTransformExecutor.executeToSequence(rdd, tp).collect();

    assertEquals(2, out.size());
    List<List<Writable>> seq0;
    List<List<Writable>> seq1;
    if (out.get(0).size() == 3) {
        seq0 = out.get(0);
        seq1 = out.get(1);
    } else {
        seq0 = out.get(1);
        seq1 = out.get(0);
    }

    List<List<Writable>> expSeq0 = Arrays.asList(
                    Arrays.<Writable>asList(new Text("k1a"), new Text("k2a"), new LongWritable(-10)),
                    Arrays.<Writable>asList(new Text("k1a"), new Text("k2a"), new LongWritable(0)),
                    Arrays.<Writable>asList(new Text("k1a"), new Text("k2a"), new LongWritable(10)));

    List<List<Writable>> expSeq1 = Arrays.asList(
                    Arrays.<Writable>asList(new Text("k1b"), new Text("k2b"), new LongWritable(5)),
                    Arrays.<Writable>asList(new Text("k1b"), new Text("k2b"), new LongWritable(10)));

    assertEquals(expSeq0, seq0);
    assertEquals(expSeq1, seq1);
}
 
Example #8
Source File: TestConvertToSequence.java    From deeplearning4j with Apache License 2.0 4 votes vote down vote up
@Test
public void testConvertToSequenceCompoundKey() {

    Schema s = new Schema.Builder().addColumnsString("key1", "key2").addColumnLong("time").build();

    List<List<Writable>> allExamples =
                    Arrays.asList(Arrays.<Writable>asList(new Text("k1a"), new Text("k2a"), new LongWritable(10)),
                                    Arrays.<Writable>asList(new Text("k1b"), new Text("k2b"), new LongWritable(10)),
                                    Arrays.<Writable>asList(new Text("k1a"), new Text("k2a"),
                                                    new LongWritable(-10)),
                                    Arrays.<Writable>asList(new Text("k1b"), new Text("k2b"), new LongWritable(5)),
                                    Arrays.<Writable>asList(new Text("k1a"), new Text("k2a"), new LongWritable(0)));

    TransformProcess tp = new TransformProcess.Builder(s)
                    .convertToSequence(Arrays.asList("key1", "key2"), new NumericalColumnComparator("time"))
                    .build();

    JavaRDD<List<Writable>> rdd = sc.parallelize(allExamples);

    List<List<List<Writable>>> out = SparkTransformExecutor.executeToSequence(rdd, tp).collect();

    assertEquals(2, out.size());
    List<List<Writable>> seq0;
    List<List<Writable>> seq1;
    if (out.get(0).size() == 3) {
        seq0 = out.get(0);
        seq1 = out.get(1);
    } else {
        seq0 = out.get(1);
        seq1 = out.get(0);
    }

    List<List<Writable>> expSeq0 = Arrays.asList(
                    Arrays.<Writable>asList(new Text("k1a"), new Text("k2a"), new LongWritable(-10)),
                    Arrays.<Writable>asList(new Text("k1a"), new Text("k2a"), new LongWritable(0)),
                    Arrays.<Writable>asList(new Text("k1a"), new Text("k2a"), new LongWritable(10)));

    List<List<Writable>> expSeq1 = Arrays.asList(
                    Arrays.<Writable>asList(new Text("k1b"), new Text("k2b"), new LongWritable(5)),
                    Arrays.<Writable>asList(new Text("k1b"), new Text("k2b"), new LongWritable(10)));

    assertEquals(expSeq0, seq0);
    assertEquals(expSeq1, seq1);
}