Java Code Examples for org.apache.spark.api.java.JavaSparkContext.parallelize()

The following are Jave code examples for showing how to use parallelize() of the org.apache.spark.api.java.JavaSparkContext class. You can vote up the examples you like. Your votes will be used in our system to get more good examples.
Example 1
Project: ViraPipe   File: InterleaveMulti.java   Source Code and License Vote up 12 votes
public static void interleaveSplitFastq(FileStatus fst, FileStatus fst2, String splitDir, int splitlen, JavaSparkContext sc) throws IOException {

    List<FileSplit> nlif = NLineInputFormat.getSplitsForFile(fst, sc.hadoopConfiguration(), splitlen);
    List<FileSplit> nlif2 = NLineInputFormat.getSplitsForFile(fst2, sc.hadoopConfiguration(), splitlen);

    JavaRDD<FileSplit> splitRDD = sc.parallelize(nlif);
    JavaRDD<FileSplit> splitRDD2 = sc.parallelize(nlif2);
    JavaPairRDD<FileSplit, FileSplit> zips = splitRDD.zip(splitRDD2);

    zips.foreach( splits ->  {
      Path path = splits._1.getPath();
      FastqRecordReader fqreader = new FastqRecordReader(new Configuration(), splits._1);
      FastqRecordReader fqreader2 = new FastqRecordReader(new Configuration(), splits._2);
      writeInterleavedSplits(fqreader, fqreader2, new Configuration(), splitDir+"/"+path.getParent().getName()+"_"+splits._1.getStart()+".fq");
    });
  }
 
Example 2
Project: ViraPipe   File: InterleaveMulti.java   Source Code and License Vote up 6 votes
private static void splitFastq(FileStatus fst, String fqPath, String splitDir, int splitlen, JavaSparkContext sc) throws IOException {
  Path fqpath = new Path(fqPath);
  String fqname = fqpath.getName();
  String[] ns = fqname.split("\\.");
  //TODO: Handle also compressed files
  List<FileSplit> nlif = NLineInputFormat.getSplitsForFile(fst, sc.hadoopConfiguration(), splitlen);

  JavaRDD<FileSplit> splitRDD = sc.parallelize(nlif);

  splitRDD.foreach( split ->  {

    FastqRecordReader fqreader = new FastqRecordReader(new Configuration(), split);
    writeFastqFile(fqreader, new Configuration(), splitDir + "/split_" + split.getStart() + "." + ns[1]);

   });
}
 
Example 3
Project: ViraPipe   File: Decompress.java   Source Code and License Vote up 6 votes
public static void interleaveSplitFastq(FileStatus fst, FileStatus fst2, String splitDir, int splitlen, JavaSparkContext sc) throws IOException {

    List<FileSplit> nlif = NLineInputFormat.getSplitsForFile(fst, sc.hadoopConfiguration(), splitlen);
    List<FileSplit> nlif2 = NLineInputFormat.getSplitsForFile(fst2, sc.hadoopConfiguration(), splitlen);

    JavaRDD<FileSplit> splitRDD = sc.parallelize(nlif);
    JavaRDD<FileSplit> splitRDD2 = sc.parallelize(nlif2);
    JavaPairRDD<FileSplit, FileSplit> zips = splitRDD.zip(splitRDD2);

    zips.foreach( splits ->  {
      Path path = splits._1.getPath();
      FastqRecordReader fqreader = new FastqRecordReader(new Configuration(), splits._1);
      FastqRecordReader fqreader2 = new FastqRecordReader(new Configuration(), splits._2);

      writeInterleavedSplits(fqreader, fqreader2, new Configuration(), splitDir+"/"+path.getParent().getName()+"_"+splits._1.getStart()+".fq");
    });
  }
 
Example 4
Project: ViraPipe   File: DecompressInterleave.java   Source Code and License Vote up 6 votes
public static void interleaveSplitFastq(FileStatus fst, FileStatus fst2, String splitDir, int splitlen, JavaSparkContext sc) throws IOException {

    String[] ns = fst.getPath().getName().split("\\.");
    //TODO: Handle also compressed files
    List<FileSplit> nlif = NLineInputFormat.getSplitsForFile(fst, sc.hadoopConfiguration(), splitlen);
    List<FileSplit> nlif2 = NLineInputFormat.getSplitsForFile(fst2, sc.hadoopConfiguration(), splitlen);

    JavaRDD<FileSplit> splitRDD = sc.parallelize(nlif);
    JavaRDD<FileSplit> splitRDD2 = sc.parallelize(nlif2);
    JavaPairRDD<FileSplit, FileSplit> zips = splitRDD.zip(splitRDD2);

    zips.foreach( splits ->  {
      Path path = splits._1.getPath();
      FastqRecordReader fqreader = new FastqRecordReader(new Configuration(), splits._1);
      FastqRecordReader fqreader2 = new FastqRecordReader(new Configuration(), splits._2);
      writeInterleavedSplits(fqreader, fqreader2, new Configuration(), splitDir, path.getParent().getName()+"_"+splits._1.getStart()+".fq");
    });
  }
 
Example 5
Project: tutorial-sparkpi-java-spring   File: SparkPiProducer.java   Source Code and License Vote up 6 votes
public String GetPi(int scale) {
    JavaSparkContext jsc = SparkContextProvider.getContext();

    int n = 100000 * scale;
    List<Integer> l = new ArrayList<Integer>(n);
    for (int i = 0; i < n; i++) {
        l.add(i);
    }

    JavaRDD<Integer> dataSet = jsc.parallelize(l, scale);

    int count = dataSet.map(integer -> {
        double x = Math.random() * 2 - 1;
        double y = Math.random() * 2 - 1;
        return (x * x + y * y < 1) ? 1 : 0;
    }).reduce((integer, integer2) -> integer + integer2);

    String ret = "Pi is rouuuughly " + 4.0 * count / n;

    return ret;
}
 
Example 6
Project: ViraPipe   File: Decompress.java   Source Code and License Vote up 5 votes
private static void splitFastq(FileStatus fst, String fqPath, String splitDir, int splitlen, JavaSparkContext sc) throws IOException {
  Path fqpath = new Path(fqPath);
  String fqname = fqpath.getName();
  String[] ns = fqname.split("\\.");
  List<FileSplit> nlif = NLineInputFormat.getSplitsForFile(fst, sc.hadoopConfiguration(), splitlen);

  JavaRDD<FileSplit> splitRDD = sc.parallelize(nlif);

  splitRDD.foreach( split ->  {

    FastqRecordReader fqreader = new FastqRecordReader(new Configuration(), split);
    writeFastqFile(fqreader, new Configuration(), splitDir + "/split_" + split.getStart() + "." + ns[1]);

   });
}
 
Example 7
Project: ViraPipe   File: Interleave.java   Source Code and License Vote up 5 votes
private static void splitFastq(FileStatus fst, String fqPath, String splitDir, int splitlen, JavaSparkContext sc) throws IOException {
  Path fqpath = new Path(fqPath);
  String fqname = fqpath.getName();
  String[] ns = fqname.split("\\.");
  List<FileSplit> nlif = NLineInputFormat.getSplitsForFile(fst, sc.hadoopConfiguration(), splitlen);

  JavaRDD<FileSplit> splitRDD = sc.parallelize(nlif);

  splitRDD.foreach( split ->  {

    FastqRecordReader fqreader = new FastqRecordReader(new Configuration(), split);
    writeFastqFile(fqreader, new Configuration(), splitDir + "/split_" + split.getStart() + "." + ns[1]);

   });
}
 
Example 8
Project: incubator-sdap-mudrod   File: MetadataExtractor.java   Source Code and License Vote up 5 votes
/**
 * buildMetadataRDD: Convert metadata list to JavaPairRDD
 *
 * @param es        an Elasticsearch client node instance
 * @param sc        spark context
 * @param index     index name of log processing application
 * @param metadatas metadata list
 * @return PairRDD, in each pair key is metadata short name and value is term
 * list extracted from metadata variables.
 */
protected JavaPairRDD<String, List<String>> buildMetadataRDD(ESDriver es, JavaSparkContext sc, String index, List<PODAACMetadata> metadatas) {
  JavaRDD<PODAACMetadata> metadataRDD = sc.parallelize(metadatas);
  JavaPairRDD<String, List<String>> metadataTermsRDD = metadataRDD.mapToPair(new PairFunction<PODAACMetadata, String, List<String>>() {
    /**
     *
     */
    private static final long serialVersionUID = 1L;

    @Override
    public Tuple2<String, List<String>> call(PODAACMetadata metadata) throws Exception {
      return new Tuple2<String, List<String>>(metadata.getShortName(), metadata.getAllTermList());
    }
  }).reduceByKey(new Function2<List<String>, List<String>, List<String>>() {
    /**
     *
     */
    private static final long serialVersionUID = 1L;

    @Override
    public List<String> call(List<String> v1, List<String> v2) throws Exception {
      List<String> list = new ArrayList<String>();
      list.addAll(v1);
      list.addAll(v2);
      return list;
    }
  });

  return metadataTermsRDD;
}
 
Example 9
Project: s3-inventory-usage-examples   File: ReducedRedundancyLocatorExampleMain.java   Source Code and License Vote up 4 votes
public static void main(String[] args) throws Exception{
    String srcBucketName;
    String scrBucketKey;
    String destBucketName;
    String destPrefix;
    ArgumentParser argumentParser = new ArgumentParser();
    AmazonS3 s3Client = new AmazonS3Client();

    try {
        BucketKey location = argumentParser.parseArguments(args);
        srcBucketName = location.getSrcBucket();
        scrBucketKey = location.getSrcKey();
        destBucketName = location.getDestBucket();
        destPrefix = location.getDestPrefix();
    } catch (ParseException e) {
        LOG.info(PARSE_ERROR_MSG);
        throw new IllegalArgumentException("Parser throw a parse Exception", e);
    }

    // Obtain the original manifest files
    InventoryManifestRetriever inventoryManifestRetriever =
            new InventoryManifestRetriever(s3Client, srcBucketName, scrBucketKey);
    InventoryManifest manifest = inventoryManifestRetriever.getInventoryManifest();

    // Check if the inventory report includes the StorageClass column
    String fileSchema = manifest.getFileSchema();
    String filterColumn = "storageClass";
    if (!StringUtils.containsIgnoreCase(fileSchema, filterColumn)) {
        throw new StorageClassNotIncludedException();
    }

    //Create Spark Context
    SparkConf sparkConf = new SparkConf();
    JavaSparkContext sc = new JavaSparkContext(sparkConf);
    Broadcast<CachedS3ClientFactory> clientFactory = sc.broadcast(new CachedS3ClientFactory());

    // Get the inventory report, split it into lines, parse each line to a POJO,
    // Filter, and write new csv file to S3
    JavaRDD<InventoryManifest.Locator> locatorRDD = sc.parallelize(manifest.getLocators());
    List<InventoryManifest.Locator> newLocatorList = locatorRDD
            .map(new InventoryReportLineRetriever(clientFactory, manifest))
            .flatMap(new InventoryReportMapper(manifest))
            .filter(new ReducedRedundancyStorageClassFilter())
            .mapPartitions(new WriteNewInventoryReportFunc(clientFactory, srcBucketName, manifest,
                    destBucketName, destPrefix))
            .collect();

    // Generate new manifest files including new locators, and send them back to S3
    new ManifestWriter(s3Client, destBucketName, destPrefix, srcBucketName, manifest)
            .writeManifest(newLocatorList);

    sc.close();
}
 
Example 10
Project: neural-algorithms   File: ExampleSpark.java   Source Code and License Vote up 4 votes
public static void main(String[] args) {

        Logger.getLogger("org").setLevel(Level.WARN);

        SparkConf sparkConf = new SparkConf()
                .setAppName("ExampleSpark")
                .setMaster("local");
        JavaSparkContext jsc = new JavaSparkContext(sparkConf);

        //String in = "data/iris2.data";
        //String out = "data/iris2outSVM.data";

        //double[][] inputs = IOUtils.readMatrix(in, ",");
        //double[] outputs = IOUtils.readVector(out);

        IdxManager idx = IOUtils.deserialize("data/idx.ser");
        IdxManager idxTest = IOUtils.deserialize("data/idx-test.ser");
        double[][] inputs = idx.getData();
        double[] outputs = idx.getLabelsVec();
        double[][] inputsTest = idxTest.getData();
        double[] outputsTest = idxTest.getLabelsVec();
        inputs = HogManager.exportDataFeatures(inputs, idx.getNumOfRows(),
               idx.getNumOfCols());
        inputsTest = HogManager.exportDataFeatures(inputsTest, idx.getNumOfRows(),
               idx.getNumOfCols());

        List<LabeledPoint> pointList = new ArrayList<>();
        for (int i = 0; i < outputs.length; i++) {
            pointList.add(new LabeledPoint(outputs[i], Vectors.dense(inputs[i])));
        }

        List<LabeledPoint> pointListTest = new ArrayList<>();
        for (int i = 0; i < outputsTest.length; i++) {
            pointListTest.add(new LabeledPoint(outputsTest[i],
                    Vectors.dense(inputsTest[i])));
        }

        JavaRDD<LabeledPoint> trainingData = jsc.parallelize(pointList);
        JavaRDD<LabeledPoint> testData = jsc.parallelize(pointListTest);

        // Split the data into training and test sets (30% held out for testing)
        //JavaRDD<LabeledPoint>[] splits = data.randomSplit(new double[]{0.7, 0.3});
        //JavaRDD<LabeledPoint> trainingData = splits[0];
        //JavaRDD<LabeledPoint> testData = splits[1];

        // Set parameters.
        // Empty categoricalFeaturesInfo indicates all features are continuous.
        Integer numClasses = 10;
        Map<Integer, Integer> categoricalFeaturesInfo = new HashMap<>();
        String impurity = "gini";
        Integer maxDepth = 10;
        Integer maxBins = 256;

        // Train a DecisionTree model for classification.
        long startTime = System.currentTimeMillis();
        final DecisionTreeModel model = DecisionTree.trainClassifier(trainingData,
                numClasses, categoricalFeaturesInfo, impurity, maxDepth, maxBins);
        long endTime = System.currentTimeMillis();
        long learnTime = endTime - startTime;

        // Evaluate model on test instances and compute test error
        JavaPairRDD<Double, Double> predictionAndLabel =
                testData.mapToPair(
                        p -> new Tuple2<>(model.predict(p.features()), p.label()));
        Double testErr = 1.0 * predictionAndLabel.filter(
                pl -> !pl._1().equals(pl._2())).count() / testData.count();

        // results
        new File("results").mkdir();
        IOUtils.writeStr("results/dtree_error.data", Double.toString(testErr));
        IOUtils.writeStr("results/dtree_model.data", model.toDebugString());

        double[][] outFinal = new double[outputsTest.length][];
        for (int i = 0; i < outputsTest.length; i++) {
            outFinal[i] = valToVec(model.predict(Vectors.dense(inputsTest[i])));
        }

        ConfusionMatrix cm = new ConfusionMatrix(outFinal, idxTest.getLabels());
        cm.writeClassErrorMatrix("results/confusion_matrix.data");
        IOUtils.writeStr("results/learn_time_ms.data", Long.toString(learnTime));
    }
 
Example 11
Project: ViraPipe   File: HDFSWriter.java   Source Code and License Vote up 4 votes
private static JavaPairRDD<Text, SequencedFragment> interleaveReads(String fastq, String fastq2, int splitlen, JavaSparkContext sc) throws IOException {

        FileSystem fs = FileSystem.get(new Configuration());

        FileStatus fst = fs.getFileStatus(new Path(fastq));
        FileStatus fst2 = fs.getFileStatus(new Path(fastq2));

        List<FileSplit> nlif = NLineInputFormat.getSplitsForFile(fst, sc.hadoopConfiguration(), splitlen);
        List<FileSplit> nlif2 = NLineInputFormat.getSplitsForFile(fst2, sc.hadoopConfiguration(), splitlen);

        JavaRDD<FileSplit> splitRDD = sc.parallelize(nlif);
        JavaRDD<FileSplit> splitRDD2 = sc.parallelize(nlif2);
        JavaPairRDD<FileSplit, FileSplit> zips = splitRDD.zip(splitRDD2);

        return zips.flatMapToPair( splits ->  {

            FastqInputFormat.FastqRecordReader fqreader = new FastqInputFormat.FastqRecordReader(new Configuration(), splits._1);
            FastqInputFormat.FastqRecordReader fqreader2 = new FastqInputFormat.FastqRecordReader(new Configuration(), splits._2);

            ArrayList<Tuple2<Text, SequencedFragment>> reads = new ArrayList<Tuple2<Text, SequencedFragment>>();
            while (fqreader.nextKeyValue()) {
                String key = fqreader.getCurrentKey().toString();
                String[] keysplit = key.split(" ");
                key = keysplit[0];

                SequencedFragment sf = new SequencedFragment();
                sf.setQuality(new Text(fqreader.getCurrentValue().getQuality().toString()));
                sf.setSequence(new Text(fqreader.getCurrentValue().getSequence().toString()));

                if (fqreader2.nextKeyValue()) {

                    String key2 = fqreader2.getCurrentKey().toString();
                    String[] keysplit2 = key2.split(" ");
                    key2 = keysplit2[0];
                    //key2 = key2.replace(" 2:N:0:1","/2");

                    SequencedFragment sf2 = new SequencedFragment();
                    sf2.setQuality(new Text(fqreader2.getCurrentValue().getQuality().toString()));
                    sf2.setSequence(new Text(fqreader2.getCurrentValue().getSequence().toString()));
                    reads.add(new Tuple2<Text, SequencedFragment>(new Text(key), sf));
                    reads.add(new Tuple2<Text, SequencedFragment>(new Text(key2), sf2));
                }
            }

            return reads.iterator();

        });
    }
 
Example 12
Project: Apache-Spark-2x-for-Java-Developers   File: PropertyGraphExampleFromEdges.java   Source Code and License Vote up 4 votes
public static void main(String[] args) {
		System.setProperty("hadoop.home.dir", "C:\\softwares\\Winutils");
		SparkConf conf = new SparkConf().setMaster("local").setAppName("graph");
		JavaSparkContext javaSparkContext = new JavaSparkContext(conf);
		ClassTag<String> stringTag = scala.reflect.ClassTag$.MODULE$.apply(String.class);


		List<Edge<String>> edges = new ArrayList<>();

		edges.add(new Edge<String>(1, 2, "Friend"));
		edges.add(new Edge<String>(2, 3, "Advisor"));
		edges.add(new Edge<String>(1, 3, "Friend"));
		edges.add(new Edge<String>(4, 3, "colleague"));
		edges.add(new Edge<String>(4, 5, "Relative"));
		edges.add(new Edge<String>(2, 5, "BusinessPartners"));


		JavaRDD<Edge<String>> edgeRDD = javaSparkContext.parallelize(edges);
		
		
		Graph<String, String> graph = Graph.fromEdges(edgeRDD.rdd(), "",StorageLevel.MEMORY_ONLY(), StorageLevel.MEMORY_ONLY(), stringTag, stringTag);
		
		
		graph.vertices().toJavaRDD().collect().forEach(System.out::println);
		
		
		
//	graph.aggregateMessages(sendMsg, mergeMsg, tripletFields, evidence$11)	
		
	}
 
Example 13
Project: spark-traffic   File: Application.java   Source Code and License Vote up 4 votes
public static void main(String[] args) {
        boolean isLocal = false;

        final String master = isLocal ? "local[4]" : "spark://10.128.184.199:7077";
        final String csv = isLocal ? "Z:/RCS_SP1/RAW_DATA_MORE/2016_03/TAXI/TAXI_20160301.csv" : "/pi_nj_57/RCS_SP1/RAW_DATA_MORE/2016_03/TAXI/TAXI_20160301.csv";
        final String appName = "SpeedCalculator";

        Calculator calculator = new Calculator();

        SparkConf conf = new SparkConf()
                .set("spark.executor.memory", "4G")
                .set("spark.submit.deployMode", "cluster")
                .setMaster("spark://10.128.184.199:7077")
                .setJars(new String[]{"C:\\Users\\i321761\\Desktop\\git\\github.wdf.sap.corp\\i321761\\hadoop-sample\\target\\hadoopsample-1.0-SNAPSHOT.jar"});

        JavaSparkContext sc = new JavaSparkContext(master, appName, conf);
//        JavaRDD<String> rdd = sc.textFile(csv, 2);
        JavaRDD<String> rdd = sc.parallelize(Arrays.asList("abc", "def"));
        long start = System.currentTimeMillis();
        System.out.println("Count Start ....");

        // Convert csv string to taxi point structure and remove invalid records
        JavaRDD<ITaxiMonitor.TaxiPoint> taxiPointRDD = rdd.map(line -> TaxiPointUtil.parseTaxiPoint(line))
                .filter(point -> point != null && !point.receiveTime.isEmpty() && point.receiveTime.contains(" 08:"));

        JavaPairRDD<Long, List<ITaxiMonitor.TaxiPoint>> slotsIn5 = taxiPointRDD
                .keyBy(point -> (DateTimeUtil.parseToMillSecond(point.receiveTime, "UTC+8") / 300000) * 300000)
                .combineByKey(
                        // 收到每个key的第一条记录时的初始化工作
                        v -> {
                            List<ITaxiMonitor.TaxiPoint> points = new ArrayList();
                            points.add(v);
                            return points;
                        },
            
                        // 对于某个key,收到新的记录时的操作
                        (c, v) -> {
                            c.add(v);
                            return c;
                        },
            
                        // 一个key的集合可能分布在多个task上,如何合并同一个key的操作
                        (c1, c2) -> {
                            c1.addAll(c2);
                            return c1;
                        }
                )
                .sortByKey();
        // 一个key代表5分钟的交通数据集合,对每个5分钟的集合调用计算接口计算出交通速度
        slotsIn5.map(slot -> calculator.execute(slot._2(), slot._1(), slot._1()))
                .collect().forEach(speedResult -> {
                    speedResult.getTimedEdgeSpeeds().forEach(timedEdgeSpeeds -> {
                        long t = DateTimeUtil.parseToMillSecond(timedEdgeSpeeds.timestamp, "UTC+0");
                        timedEdgeSpeeds.edgeSpeeds.forEach(speed -> System.out.println(" * EDGE_SPEED: " + TaxiPointUtil.formatEdgeSpeed(t, speed, ",")));
                    });
                });

        slotsIn5.take(10)
                .forEach(slot -> System.out.println("slot: " + slot._1() + ", " + DateTimeUtil.formatToUTC(slot._1()) + ", count: " + slot._2().size()));
//                .foreach(slot -> System.out.println("slot: " + DateTimeUtil.formatToUTC(slot._1()) + ", count" + slot._2().size()));

        sc.stop();
    }
 
Example 14
Project: ViraPipe   File: DecompressInterleave.java   Source Code and License Vote up 3 votes
private static void splitFastq(FileStatus fst, String splitDir, int splitlen, JavaSparkContext sc) throws IOException {

    //TODO: Handle also compressed files
    List<FileSplit> nlif = NLineInputFormat.getSplitsForFile(fst, new Configuration(), splitlen);

    JavaRDD<FileSplit> splitRDD = sc.parallelize(nlif);

    splitRDD.foreach( split ->  {

      FastqRecordReader fqreader = new FastqRecordReader(new Configuration(), split);
      writeFastqFile(fqreader, new Configuration(), splitDir + "/" + split.getPath().getName()+"_"+split.getStart() + ".fq");

     });
  }
 
Example 15
Project: ViraPipe   File: Interleave.java   Source Code and License Vote up 3 votes
public static void interleaveSplitFastq(FileStatus fst, FileStatus fst2, String splitDir, int splitlen, JavaSparkContext sc) throws IOException {

    List<FileSplit> nlif = NLineInputFormat.getSplitsForFile(fst, sc.hadoopConfiguration(), splitlen);
    List<FileSplit> nlif2 = NLineInputFormat.getSplitsForFile(fst2, sc.hadoopConfiguration(), splitlen);

    JavaRDD<FileSplit> splitRDD = sc.parallelize(nlif);
    JavaRDD<FileSplit> splitRDD2 = sc.parallelize(nlif2);
    JavaPairRDD<FileSplit, FileSplit> zips = splitRDD.zip(splitRDD2);

    zips.foreach( splits ->  {
      Path path = splits._1.getPath();
      FastqRecordReader fqreader = new FastqRecordReader(new Configuration(), splits._1);
      FastqRecordReader fqreader2 = new FastqRecordReader(new Configuration(), splits._2);
      writeInterleavedSplits(fqreader, fqreader2, new Configuration(), splitDir+"/"+path.getParent().getName()+"_"+splits._1.getStart()+".fq");
    });
  }
 
Example 16
Project: bunsen   File: FhirEncodersTest.java   Source Code and License Vote up 3 votes
@Test
public void testFromRdd() {

  JavaSparkContext context = new JavaSparkContext(spark.sparkContext());

  JavaRDD<Condition> conditionRdd = context.parallelize(ImmutableList.of(condition));

  Dataset<Condition> ds = spark.createDataset(conditionRdd.rdd(),
      encoders.of(Condition.class));

  Condition convertedCondition = ds.head();

  Assert.assertEquals(condition.getId(),
      convertedCondition.getId());
}