Java Code Examples for org.apache.spark.api.java.JavaRDD#toRDD()

The following examples show how to use org.apache.spark.api.java.JavaRDD#toRDD() . You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: GraphXGraphGenerator.java    From rya with Apache License 2.0 8 votes vote down vote up
public Graph<RyaTypeWritable, RyaTypeWritable> createGraph(SparkContext sc, Configuration conf) throws IOException, AccumuloSecurityException{
    StorageLevel storageLvl1 = StorageLevel.MEMORY_ONLY();
    StorageLevel storageLvl2 = StorageLevel.MEMORY_ONLY();
    ClassTag<RyaTypeWritable> RTWTag = ClassTag$.MODULE$.apply(RyaTypeWritable.class);
    RyaTypeWritable rtw = null;
    RDD<Tuple2<Object, RyaTypeWritable>> vertexRDD = getVertexRDD(sc, conf);

    RDD<Tuple2<Object, Edge>> edgeRDD = getEdgeRDD(sc, conf);
    JavaRDD<Tuple2<Object, Edge>> jrddTuple = edgeRDD.toJavaRDD();
    JavaRDD<Edge<RyaTypeWritable>> jrdd = jrddTuple.map(tuple -> tuple._2);

    RDD<Edge<RyaTypeWritable>> goodERDD = JavaRDD.toRDD(jrdd);

    return Graph.apply(vertexRDD, goodERDD, rtw, storageLvl1, storageLvl2, RTWTag, RTWTag);
}
 
Example 2
Source File: MLContextTest.java    From systemds with Apache License 2.0 6 votes vote down vote up
@Test
public void testRDDSumIJVDML() {
	System.out.println("MLContextTest - RDD<String> IJV sum DML");

	List<String> list = new ArrayList<>();
	list.add("1 1 1");
	list.add("2 1 2");
	list.add("1 2 3");
	list.add("3 3 4");
	JavaRDD<String> javaRDD = sc.parallelize(list);
	RDD<String> rdd = JavaRDD.toRDD(javaRDD);

	MatrixMetadata mm = new MatrixMetadata(MatrixFormat.IJV, 3, 3);

	Script script = dml("print('sum: ' + sum(M));").in("M", rdd, mm);
	setExpectedStdOut("sum: 10.0");
	ml.execute(script);
}
 
Example 3
Source File: MLContextTest.java    From systemds with Apache License 2.0 6 votes vote down vote up
@Test
public void testRDDGoodMetadataDML() {
	System.out.println("MLContextTest - RDD<String> good metadata DML");

	List<String> list = new ArrayList<>();
	list.add("1,1,1");
	list.add("2,2,2");
	list.add("3,3,3");
	JavaRDD<String> javaRDD = sc.parallelize(list);
	RDD<String> rdd = JavaRDD.toRDD(javaRDD);

	MatrixMetadata mm = new MatrixMetadata(3, 3, 9);

	Script script = dml("print('sum: ' + sum(M));").in("M", rdd, mm);
	setExpectedStdOut("sum: 18.0");
	ml.execute(script);
}
 
Example 4
Source File: MLContextTest.java    From systemds with Apache License 2.0 6 votes vote down vote up
@Test
public void testRDDSumIJVDML() {
	System.out.println("MLContextTest - RDD<String> IJV sum DML");

	List<String> list = new ArrayList<>();
	list.add("1 1 1");
	list.add("2 1 2");
	list.add("1 2 3");
	list.add("3 3 4");
	JavaRDD<String> javaRDD = sc.parallelize(list);
	RDD<String> rdd = JavaRDD.toRDD(javaRDD);

	MatrixMetadata mm = new MatrixMetadata(MatrixFormat.IJV, 3, 3);

	Script script = dml("print('sum: ' + sum(M));").in("M", rdd, mm);
	setExpectedStdOut("sum: 10.0");
	ml.execute(script);
}
 
Example 5
Source File: MLContextTest.java    From systemds with Apache License 2.0 6 votes vote down vote up
@Test
public void testRDDGoodMetadataDML() {
	System.out.println("MLContextTest - RDD<String> good metadata DML");

	List<String> list = new ArrayList<>();
	list.add("1,1,1");
	list.add("2,2,2");
	list.add("3,3,3");
	JavaRDD<String> javaRDD = sc.parallelize(list);
	RDD<String> rdd = JavaRDD.toRDD(javaRDD);

	MatrixMetadata mm = new MatrixMetadata(3, 3, 9);

	Script script = dml("print('sum: ' + sum(M));").in("M", rdd, mm);
	setExpectedStdOut("sum: 18.0");
	ml.execute(script);
}
 
Example 6
Source File: MLContextTest.java    From systemds with Apache License 2.0 5 votes vote down vote up
@Test
public void testRDDSumCSVDML() {
	System.out.println("MLContextTest - RDD<String> CSV sum DML");

	List<String> list = new ArrayList<>();
	list.add("1,1,1");
	list.add("2,2,2");
	list.add("3,3,3");
	JavaRDD<String> javaRDD = sc.parallelize(list);
	RDD<String> rdd = JavaRDD.toRDD(javaRDD);

	Script script = dml("print('sum: ' + sum(M));").in("M", rdd);
	setExpectedStdOut("sum: 18.0");
	ml.execute(script);
}
 
Example 7
Source File: MLContextTest.java    From systemds with Apache License 2.0 5 votes vote down vote up
@SuppressWarnings({ "rawtypes", "unchecked" })
@Test
public void testInputTupleSeqNoMetadataDML() {
	System.out.println("MLContextTest - Tuple sequence no metadata DML");

	List<String> list1 = new ArrayList<>();
	list1.add("1,2");
	list1.add("3,4");
	JavaRDD<String> javaRDD1 = sc.parallelize(list1);
	RDD<String> rdd1 = JavaRDD.toRDD(javaRDD1);

	List<String> list2 = new ArrayList<>();
	list2.add("5,6");
	list2.add("7,8");
	JavaRDD<String> javaRDD2 = sc.parallelize(list2);
	RDD<String> rdd2 = JavaRDD.toRDD(javaRDD2);

	Tuple2 tuple1 = new Tuple2("m1", rdd1);
	Tuple2 tuple2 = new Tuple2("m2", rdd2);
	List tupleList = new ArrayList();
	tupleList.add(tuple1);
	tupleList.add(tuple2);
	Seq seq = JavaConversions.asScalaBuffer(tupleList).toSeq();

	Script script = dml("print('sums: ' + sum(m1) + ' ' + sum(m2));").in(seq);
	setExpectedStdOut("sums: 10.0 26.0");
	ml.execute(script);
}
 
Example 8
Source File: MLContextTest.java    From systemds with Apache License 2.0 5 votes vote down vote up
@SuppressWarnings({ "rawtypes", "unchecked" })
@Test
public void testInputTupleSeqWithMetadataDML() {
	System.out.println("MLContextTest - Tuple sequence with metadata DML");

	List<String> list1 = new ArrayList<>();
	list1.add("1,2");
	list1.add("3,4");
	JavaRDD<String> javaRDD1 = sc.parallelize(list1);
	RDD<String> rdd1 = JavaRDD.toRDD(javaRDD1);

	List<String> list2 = new ArrayList<>();
	list2.add("5,6");
	list2.add("7,8");
	JavaRDD<String> javaRDD2 = sc.parallelize(list2);
	RDD<String> rdd2 = JavaRDD.toRDD(javaRDD2);

	MatrixMetadata mm1 = new MatrixMetadata(2, 2);
	MatrixMetadata mm2 = new MatrixMetadata(2, 2);

	Tuple3 tuple1 = new Tuple3("m1", rdd1, mm1);
	Tuple3 tuple2 = new Tuple3("m2", rdd2, mm2);
	List tupleList = new ArrayList();
	tupleList.add(tuple1);
	tupleList.add(tuple2);
	Seq seq = JavaConversions.asScalaBuffer(tupleList).toSeq();

	Script script = dml("print('sums: ' + sum(m1) + ' ' + sum(m2));").in(seq);
	setExpectedStdOut("sums: 10.0 26.0");
	ml.execute(script);
}
 
Example 9
Source File: MLContextTest.java    From systemds with Apache License 2.0 5 votes vote down vote up
@Test
public void testRDDSumCSVDML() {
	System.out.println("MLContextTest - RDD<String> CSV sum DML");

	List<String> list = new ArrayList<>();
	list.add("1,1,1");
	list.add("2,2,2");
	list.add("3,3,3");
	JavaRDD<String> javaRDD = sc.parallelize(list);
	RDD<String> rdd = JavaRDD.toRDD(javaRDD);

	Script script = dml("print('sum: ' + sum(M));").in("M", rdd);
	setExpectedStdOut("sum: 18.0");
	ml.execute(script);
}
 
Example 10
Source File: MLContextTest.java    From systemds with Apache License 2.0 5 votes vote down vote up
@SuppressWarnings({ "rawtypes", "unchecked" })
@Test
public void testInputTupleSeqNoMetadataDML() {
	System.out.println("MLContextTest - Tuple sequence no metadata DML");

	List<String> list1 = new ArrayList<>();
	list1.add("1,2");
	list1.add("3,4");
	JavaRDD<String> javaRDD1 = sc.parallelize(list1);
	RDD<String> rdd1 = JavaRDD.toRDD(javaRDD1);

	List<String> list2 = new ArrayList<>();
	list2.add("5,6");
	list2.add("7,8");
	JavaRDD<String> javaRDD2 = sc.parallelize(list2);
	RDD<String> rdd2 = JavaRDD.toRDD(javaRDD2);

	Tuple2 tuple1 = new Tuple2("m1", rdd1);
	Tuple2 tuple2 = new Tuple2("m2", rdd2);
	List tupleList = new ArrayList();
	tupleList.add(tuple1);
	tupleList.add(tuple2);
	Seq seq = JavaConversions.asScalaBuffer(tupleList).toSeq();

	Script script = dml("print('sums: ' + sum(m1) + ' ' + sum(m2));").in(seq);
	setExpectedStdOut("sums: 10.0 26.0");
	ml.execute(script);
}
 
Example 11
Source File: MLContextTest.java    From systemds with Apache License 2.0 5 votes vote down vote up
@SuppressWarnings({ "rawtypes", "unchecked" })
@Test
public void testInputTupleSeqWithMetadataDML() {
	System.out.println("MLContextTest - Tuple sequence with metadata DML");

	List<String> list1 = new ArrayList<>();
	list1.add("1,2");
	list1.add("3,4");
	JavaRDD<String> javaRDD1 = sc.parallelize(list1);
	RDD<String> rdd1 = JavaRDD.toRDD(javaRDD1);

	List<String> list2 = new ArrayList<>();
	list2.add("5,6");
	list2.add("7,8");
	JavaRDD<String> javaRDD2 = sc.parallelize(list2);
	RDD<String> rdd2 = JavaRDD.toRDD(javaRDD2);

	MatrixMetadata mm1 = new MatrixMetadata(2, 2);
	MatrixMetadata mm2 = new MatrixMetadata(2, 2);

	Tuple3 tuple1 = new Tuple3("m1", rdd1, mm1);
	Tuple3 tuple2 = new Tuple3("m2", rdd2, mm2);
	List tupleList = new ArrayList();
	tupleList.add(tuple1);
	tupleList.add(tuple2);
	Seq seq = JavaConversions.asScalaBuffer(tupleList).toSeq();

	Script script = dml("print('sums: ' + sum(m1) + ' ' + sum(m2));").in(seq);
	setExpectedStdOut("sums: 10.0 26.0");
	ml.execute(script);
}
 
Example 12
Source File: CollabFilterCassandra7.java    From Spark-Cassandra-Collabfiltering with Apache License 2.0 5 votes vote down vote up
public JavaRDD<Rating> predict(MatrixFactorizationModel model, CassandraJavaRDD<CassandraRow> validationsCassRdd) {
	RDD<Tuple2<Object, Object>> validationsRdd = JavaRDD.toRDD(validationsCassRdd.map(new org.apache.spark.api.java.function.Function<CassandraRow, Tuple2<Object, Object>>() {
		@Override
		public Tuple2<Object, Object> call(CassandraRow validationRow) throws Exception {
			return new Tuple2<Object, Object>(validationRow.getInt(RatingDO.USER_COL), validationRow.getInt(RatingDO.PRODUCT_COL));
		}
	}));
	JavaRDD<Rating> predictionJavaRdd = model.predict(validationsRdd).toJavaRDD();
	return predictionJavaRdd;
}
 
Example 13
Source File: JavaSVMWithSGDExample.java    From SparkDemo with MIT License 4 votes vote down vote up
public static void main(String[] args) {
  SparkConf conf = new SparkConf().setAppName("JavaSVMWithSGDExample");
  SparkContext sc = new SparkContext(conf);
  // $example on$
  String path = "data/mllib/sample_libsvm_data.txt";
  JavaRDD<LabeledPoint> data = MLUtils.loadLibSVMFile(sc, path).toJavaRDD();

  // Split initial RDD into two... [60% training data, 40% testing data].
  JavaRDD<LabeledPoint> training = data.sample(false, 0.6, 11L);
  training.cache();
  JavaRDD<LabeledPoint> test = data.subtract(training);

  // Run training algorithm to build the model.
  int numIterations = 100;
  final SVMModel model = SVMWithSGD.train(training.rdd(), numIterations);

  // Clear the default threshold.
  model.clearThreshold();

  // Compute raw scores on the test set.
  JavaRDD<Tuple2<Object, Object>> scoreAndLabels = test.map(
    new Function<LabeledPoint, Tuple2<Object, Object>>() {
      public Tuple2<Object, Object> call(LabeledPoint p) {
        Double score = model.predict(p.features());
        return new Tuple2<Object, Object>(score, p.label());
      }
    }
  );

  // Get evaluation metrics.
  BinaryClassificationMetrics metrics =
    new BinaryClassificationMetrics(JavaRDD.toRDD(scoreAndLabels));
  double auROC = metrics.areaUnderROC();

  System.out.println("Area under ROC = " + auROC);

  // Save and load model
  model.save(sc, "target/tmp/javaSVMWithSGDModel");
  SVMModel sameModel = SVMModel.load(sc, "target/tmp/javaSVMWithSGDModel");
  // $example off$

  sc.stop();
}
 
Example 14
Source File: CollabFilterCassandra8.java    From Spark-Cassandra-Collabfiltering with Apache License 2.0 4 votes vote down vote up
public JavaRDD<Rating> predict(MatrixFactorizationModel model, CassandraJavaRDD<CassandraRow> validationsCassRdd) {
	RDD<Tuple2<Object, Object>> validationsRdd = JavaRDD.toRDD(validationsCassRdd.map(validationRow -> new Tuple2<Object, Object>(validationRow.getInt(RatingDO.USER_COL), validationRow.getInt(RatingDO.PRODUCT_COL))));
	JavaRDD<Rating> predictionJavaRdd = model.predict(validationsRdd).toJavaRDD();
	return predictionJavaRdd;
}