org.apache.spark.SparkContext Java Examples

The following examples show how to use org.apache.spark.SparkContext. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: JobUtil.java    From spark-llap with Apache License 2.0 7 votes vote down vote up
public static JobConf createJobConf(Map<String, String> options, String queryString) {
  JobConf jobConf = new JobConf(SparkContext.getOrCreate().hadoopConfiguration());
  jobConf.set("hive.llap.zk.registry.user", "hive");
  jobConf.set("llap.if.hs2.connection", HWConf.RESOLVED_HS2_URL.getFromOptionsMap(options));
  if (queryString != null) {
    jobConf.set("llap.if.query", queryString);
  }
  jobConf.set("llap.if.user", HWConf.USER.getFromOptionsMap(options));
  jobConf.set("llap.if.pwd", HWConf.PASSWORD.getFromOptionsMap(options));
  if (options.containsKey("default.db")) {
    jobConf.set("llap.if.database", HWConf.DEFAULT_DB.getFromOptionsMap(options));
  }
  if (!options.containsKey("handleid")) {
    String handleId = UUID.randomUUID().toString();
    options.put("handleid", handleId);
  }
  jobConf.set("llap.if.handleid", options.get("handleid"));
  return jobConf;
}
 
Example #2
Source File: BigQuerySparkSQL.java    From spark-on-k8s-gcp-examples with Apache License 2.0 6 votes vote down vote up
private static BigQuerySQLContext createBigQuerySQLContext(String[] args) {
  String projectId = args[0];
  Preconditions.checkArgument(!Strings.isNullOrEmpty(projectId),
      "GCP project ID must not be empty");
  String gcsBucket = args[1];
  Preconditions.checkArgument(!Strings.isNullOrEmpty(gcsBucket),
      "GCS bucket must not be empty");

  String serviceAccountJsonKeyFilePath = System.getenv(APPLICATION_CREDENTIALS_ENV);
  Preconditions.checkArgument(!Strings.isNullOrEmpty(serviceAccountJsonKeyFilePath),
      APPLICATION_CREDENTIALS_ENV + " must be set");

  SQLContext sqlContext = SQLContext.getOrCreate(SparkContext.getOrCreate());
  BigQuerySQLContext bigQuerySQLContext = new BigQuerySQLContext(sqlContext);
  bigQuerySQLContext.setBigQueryProjectId(projectId);
  bigQuerySQLContext.setBigQueryGcsBucket(gcsBucket);
  bigQuerySQLContext.setGcpJsonKeyFile(serviceAccountJsonKeyFilePath);

  return bigQuerySQLContext;
}
 
Example #3
Source File: DataStreamLoaderExample.java    From toolbox with Apache License 2.0 6 votes vote down vote up
public static void main(String[] args) throws Exception {
      SparkConf conf = new SparkConf().setAppName("SLink!").setMaster("local");
      SparkContext sc = new SparkContext(conf);
      SQLContext sqlContext = new SQLContext(sc);

      //Path to dataset
      String path ="datasets/simulated/WI_samples.json";

//Create an AMIDST object for managing the data
      DataSpark dataSpark = DataSparkLoader.open(sqlContext, path);


//Print all the instances in the dataset
      dataSpark.collectDataStream()
              .forEach(
                      dataInstance -> System.out.println(dataInstance)
              );


  }
 
Example #4
Source File: SparkMaster.java    From GeoTriples with Apache License 2.0 6 votes vote down vote up
/**
 * Convert the input Dataset into RDF triples and store the results.
 * The conversion is taking place per Partitions using the mapPartition Spark transformation.
 * @param mapping_list list of TripleMaps
 */
private void convert_partition(ArrayList<TriplesMap> mapping_list){
    SparkContext sc = SparkContext.getOrCreate();

    Pair<ArrayList<TriplesMap>, List<String>> transformation_info = new Pair<>(mapping_list, Arrays.asList(reader.getHeaders()));
    ClassTag<Pair<ArrayList<TriplesMap>, List<String>>> classTag_pair = scala.reflect.ClassTag$.MODULE$.apply(Pair.class);
    Broadcast<Pair<ArrayList<TriplesMap>, List<String>>> bd_info = sc.broadcast(transformation_info, classTag_pair);

    rowRDD
        .mapPartitions(
        (Iterator<Row> rows_iter) -> {
            ArrayList<TriplesMap> p_mapping_list = bd_info.value().getKey();
            List<String> p_header = bd_info.value().getValue();
            RML_Converter rml_converter = new RML_Converter(p_mapping_list, p_header);
            rml_converter.start();
            rml_converter.registerFunctions();
            Iterator<String> triples = rml_converter.convertPartition(rows_iter);

            rml_converter.stop();
            return triples;
        })
        .saveAsTextFile(outputDir);
}
 
Example #5
Source File: LensAPI.java    From cognition with Apache License 2.0 6 votes vote down vote up
/**
 * Helper method for creating the spark context from the given cognition configuration
 * @return a new configured spark context
 */
public SparkContext createSparkContext() {
  SparkConf conf = new SparkConf();

  Configuration config = cognition.getProperties();

  conf.set("spark.serializer", KryoSerializer.class.getName());
  conf.setAppName(config.getString("app.name"));
  conf.setMaster(config.getString("master"));

  Iterator<String> iterator = config.getKeys("spark");
  while (iterator.hasNext()) {
    String key = iterator.next();
    conf.set(key, config.getString(key));
  }

  SparkContext sc = new SparkContext(conf);
  for (String jar : config.getStringArray("jars")) {
    sc.addJar(jar);
  }

  return sc;
}
 
Example #6
Source File: GeoWaveRDDLoader.java    From geowave with Apache License 2.0 6 votes vote down vote up
public static GeoWaveIndexedRDD loadIndexedRDD(
    final SparkContext sc,
    final DataStorePluginOptions storeOptions,
    final RDDOptions rddOpts,
    final NumericIndexStrategy indexStrategy) throws IOException {
  final GeoWaveRDD wrappedRDD = GeoWaveRDDLoader.loadRDD(sc, storeOptions, rddOpts);
  // Index strategy can be expensive so we will broadcast it and store it
  Broadcast<NumericIndexStrategy> broadcastStrategy = null;
  if (indexStrategy != null) {
    broadcastStrategy =
        (Broadcast<NumericIndexStrategy>) RDDUtils.broadcastIndexStrategy(sc, indexStrategy);
  }

  final GeoWaveIndexedRDD returnRDD = new GeoWaveIndexedRDD(wrappedRDD, broadcastStrategy);
  return returnRDD;
}
 
Example #7
Source File: SourceRDD.java    From beam with Apache License 2.0 6 votes vote down vote up
public Bounded(
    SparkContext sc,
    BoundedSource<T> source,
    SerializablePipelineOptions options,
    String stepName) {
  super(sc, NIL, JavaSparkContext$.MODULE$.fakeClassTag());
  this.source = source;
  this.options = options;
  // the input parallelism is determined by Spark's scheduler backend.
  // when running on YARN/SparkDeploy it's the result of max(totalCores, 2).
  // when running on Mesos it's 8.
  // when running local it's the total number of cores (local = 1, local[N] = N,
  // local[*] = estimation of the machine's cores).
  // ** the configuration "spark.default.parallelism" takes precedence over all of the above **
  this.numPartitions = sc.defaultParallelism();
  checkArgument(this.numPartitions > 0, "Number of partitions must be greater than zero.");
  this.bundleSize = options.get().as(SparkPipelineOptions.class).getBundleSize();
  this.stepName = stepName;
  this.metricsAccum = MetricsAccumulator.getInstance();
}
 
Example #8
Source File: SparkTextFileBoundedSourceVertex.java    From incubator-nemo with Apache License 2.0 6 votes vote down vote up
/**
 * Constructor.
 *
 * @param sparkContext  the spark context.
 * @param inputPath     the path of the target text file.
 * @param numPartitions the number of partitions.
 */
public SparkTextFileBoundedSourceVertex(final SparkContext sparkContext,
                                        final String inputPath,
                                        final int numPartitions) {
  this.readables = new ArrayList<>();
  final Partition[] partitions = sparkContext.textFile(inputPath, numPartitions).getPartitions();
  for (int i = 0; i < partitions.length; i++) {
    readables.add(new SparkTextFileBoundedSourceReadable(
      partitions[i],
      sparkContext.getConf(),
      i,
      inputPath,
      numPartitions));
  }
  this.estimatedSizeBytes = SizeEstimator.estimate(sparkContext.textFile(inputPath, numPartitions));
}
 
Example #9
Source File: MizoRDD.java    From mizo with Apache License 2.0 5 votes vote down vote up
public MizoRDD(SparkContext context, IMizoRDDConfig config, ClassTag<TReturn> classTag) {
    super(context, new ArrayBuffer<>(), classTag);

    if (!Strings.isNullOrEmpty(config.logConfigPath())) {
        PropertyConfigurator.configure(config.logConfigPath());
    }

    this.config = config;
    this.regionsPaths = getRegionsPaths(config.regionDirectoriesPath());
    this.relationTypes = loadRelationTypes(config.titanConfigPath());
}
 
Example #10
Source File: ConverterFactory.java    From jpmml-sparkml with GNU Affero General Public License v3.0 5 votes vote down vote up
static
public void checkVersion(){
	SparkContext sparkContext = SparkContext.getOrCreate();

	int[] version = parseVersion(sparkContext.version());

	if(!Arrays.equals(ConverterFactory.VERSION, version)){
		throw new IllegalArgumentException("Expected Apache Spark ML version " + formatVersion(ConverterFactory.VERSION) + ", got version " + formatVersion(version) + " (" + sparkContext.version() + ")");
	}
}
 
Example #11
Source File: JobFactoryTest.java    From rdf2x with Apache License 2.0 5 votes vote down vote up
@Test
public void testGetPersistJob() throws ConfigurationException {
    Runnable job = JobFactory.getJob(new String[]{"convert", "--input.file", "test.nq", "--output.target", "Preview"});
    // stop the created Spark Context to avoid conflicts in other tests
    SparkContext.getOrCreate().stop();
    assertNotNull("Non-null write job returned from factory", job);
    assertEquals("Correct job returned from factory", ConvertJob.class, job.getClass());
}
 
Example #12
Source File: Spark.java    From tinkerpop with Apache License 2.0 5 votes vote down vote up
public static SparkContext recreateStopped() {
    if (null == CONTEXT)
        throw new IllegalStateException("The Spark context has not been created.");
    if (!CONTEXT.isStopped())
        throw new IllegalStateException("The Spark context is not stopped.");
    CONTEXT = SparkContext.getOrCreate(CONTEXT.getConf());
    return CONTEXT;
}
 
Example #13
Source File: CassandraDependenciesJob.java    From zipkin-dependencies with Apache License 2.0 5 votes vote down vote up
public void run() {
  long microsLower = day * 1000;
  long microsUpper = (day * 1000) + TimeUnit.DAYS.toMicros(1) - 1;

  log.info("Running Dependencies job for {}: {} ≤ Span.timestamp {}", dateStamp, microsLower,
      microsUpper);

  SparkContext sc = new SparkContext(conf);

  List<DependencyLink> links = javaFunctions(sc)
    .cassandraTable(keyspace, "traces")
    .spanBy(r -> r.getLong("trace_id"), Long.class)
    .flatMapValues(new CassandraRowsToDependencyLinks(logInitializer, microsLower, microsUpper))
    .values()
    .mapToPair(l -> Tuple2.apply(Tuple2.apply(l.parent(), l.child()), l))
    .reduceByKey((l, r) -> DependencyLink.newBuilder()
      .parent(l.parent())
      .child(l.child())
      .callCount(l.callCount() + r.callCount())
      .errorCount(l.errorCount() + r.errorCount())
      .build())
    .values()
    .collect();

  sc.stop();

  saveToCassandra(links);
}
 
Example #14
Source File: Spark.java    From tinkerpop with Apache License 2.0 5 votes vote down vote up
public static SparkContext create(final SparkConf sparkConf) {
    if (isContextNullOrStopped()) {
        sparkConf.setAppName("Apache TinkerPop's Spark-Gremlin");
        CONTEXT = SparkContext.getOrCreate(sparkConf);
    }
    return CONTEXT;
}
 
Example #15
Source File: SparkJavaRDD.java    From incubator-nemo with Apache License 2.0 5 votes vote down vote up
/**
 * Static method to create a SparkJavaRDD object from an text file.
 *
 * @param sparkContext  the spark context containing configurations.
 * @param minPartitions the minimum number of partitions.
 * @param inputPath     the path of the input text file.
 * @return the new SparkJavaRDD object
 */
public static SparkJavaRDD<String> of(final SparkContext sparkContext,
                                      final int minPartitions,
                                      final String inputPath) {
  final DAGBuilder<IRVertex, IREdge> builder = new DAGBuilder<>();

  final org.apache.spark.rdd.RDD<String> textRdd = sparkContext.textFile(inputPath, minPartitions);
  final int numPartitions = textRdd.getNumPartitions();
  final IRVertex textSourceVertex = new SparkTextFileBoundedSourceVertex(sparkContext, inputPath, numPartitions);
  textSourceVertex.setProperty(ParallelismProperty.of(numPartitions));
  builder.addVertex(textSourceVertex);

  return new SparkJavaRDD<>(textRdd, sparkContext, builder.buildWithoutSourceSinkCheck(), textSourceVertex);
}
 
Example #16
Source File: PrecisionMetric.java    From predictionio-template-java-ecom-recommender with Apache License 2.0 5 votes vote down vote up
@Override
public Double calculate(SparkContext sc, Seq<Tuple2<EmptyParams, RDD<Tuple3<Query, PredictedResult, Set<String>>>>> qpas) {
    List<Tuple2<EmptyParams, RDD<Tuple3<Query, PredictedResult, Set<String>>>>> sets = JavaConversions.seqAsJavaList(qpas);
    List<Double> allSetResults = new ArrayList<>();

    for (Tuple2<EmptyParams, RDD<Tuple3<Query, PredictedResult, Set<String>>>> set : sets) {
        List<Double> setResults = set._2().toJavaRDD().map(new Function<Tuple3<Query, PredictedResult, Set<String>>, Double>() {
            @Override
            public Double call(Tuple3<Query, PredictedResult, Set<String>> qpa) throws Exception {
                Set<String> predicted = new HashSet<>();
                for (ItemScore itemScore : qpa._2().getItemScores()) {
                    predicted.add(itemScore.getItemEntityId());
                }
                Set<String> intersection = new HashSet<>(predicted);
                intersection.retainAll(qpa._3());

                return 1.0 * intersection.size() / qpa._2().getItemScores().size();
            }
        }).collect();

        allSetResults.addAll(setResults);
    }
    double sum = 0.0;
    for (Double value : allSetResults) sum += value;

    return sum / allSetResults.size();
}
 
Example #17
Source File: Model.java    From predictionio-template-java-ecom-recommender with Apache License 2.0 5 votes vote down vote up
public static Model load(String id, Params params, SparkContext sc) {
    JavaSparkContext jsc = JavaSparkContext.fromSparkContext(sc);
    JavaPairRDD<Integer, double[]> userFeatures = JavaPairRDD.<Integer, double[]>fromJavaRDD(jsc.<Tuple2<Integer, double[]>>objectFile("/tmp/" + id + "/userFeatures"));
    JavaPairRDD<Integer, Tuple2<String, double[]>> indexItemFeatures = JavaPairRDD.<Integer, Tuple2<String, double[]>>fromJavaRDD(jsc.<Tuple2<Integer, Tuple2<String, double[]>>>objectFile("/tmp/" + id + "/indexItemFeatures"));
    JavaPairRDD<String, Integer> userIndex = JavaPairRDD.<String, Integer>fromJavaRDD(jsc.<Tuple2<String, Integer>>objectFile("/tmp/" + id + "/userIndex"));
    JavaPairRDD<String, Integer> itemIndex = JavaPairRDD.<String, Integer>fromJavaRDD(jsc.<Tuple2<String, Integer>>objectFile("/tmp/" + id + "/itemIndex"));
    JavaRDD<ItemScore> itemPopularityScore = jsc.objectFile("/tmp/" + id + "/itemPopularityScore");
    Map<String, Item> items = jsc.<Map<String, Item>>objectFile("/tmp/" + id + "/items").collect().get(0);

    logger.info("loaded model");
    return new Model(userFeatures, indexItemFeatures, userIndex, itemIndex, itemPopularityScore, items);
}
 
Example #18
Source File: SparkJavaRDD.java    From incubator-nemo with Apache License 2.0 5 votes vote down vote up
/**
 * Constructor with Spark source RDD.
 *
 * @param sparkRDD     the Spark source rdd to wrap.
 * @param sparkContext the Spark context in the wrapped rdd.
 * @param dag          the IR DAG in construction.
 * @param lastVertex   the last vertex of the DAG in construction.
 */
SparkJavaRDD(final org.apache.spark.rdd.RDD<T> sparkRDD,
             final SparkContext sparkContext,
             final DAG<IRVertex, IREdge> dag,
             final IRVertex lastVertex) {
  super(sparkRDD, ClassTag$.MODULE$.apply(Object.class));

  this.rdd = new RDD<>(sparkContext, dag, lastVertex, Option.apply(sparkRDD), ClassTag$.MODULE$.apply(Object.class));
}
 
Example #19
Source File: MizoBuilder.java    From mizo with Apache License 2.0 5 votes vote down vote up
public MizoRDD<MizoEdge> edgesRDD(SparkContext sc) {
    return new MizoRDD<MizoEdge>(sc, this, ClassManifestFactory.classType(MizoEdge.class)) {
        @Override
        public scala.collection.Iterator<MizoEdge> createRegionIterator(Iterator<IMizoRelationParser> relationsIterator) {
            return new MizoEdgesIterator(relationsIterator, this.config);
        }
    };
}
 
Example #20
Source File: RDDUtils.java    From geowave with Apache License 2.0 5 votes vote down vote up
/**
 * Translate a set of objects in a JavaRDD to a provided type and push to GeoWave
 *
 * @throws IOException
 */
private static void writeToGeoWave(
    final SparkContext sc,
    final Index index,
    final DataStorePluginOptions outputStoreOptions,
    final DataTypeAdapter adapter,
    final JavaRDD<SimpleFeature> inputRDD) throws IOException {

  // setup the configuration and the output format
  final Configuration conf = new org.apache.hadoop.conf.Configuration(sc.hadoopConfiguration());

  GeoWaveOutputFormat.setStoreOptions(conf, outputStoreOptions);
  GeoWaveOutputFormat.addIndex(conf, index);
  GeoWaveOutputFormat.addDataAdapter(conf, adapter);

  // create the job
  final Job job = new Job(conf);
  job.setOutputKeyClass(GeoWaveOutputKey.class);
  job.setOutputValueClass(SimpleFeature.class);
  job.setOutputFormatClass(GeoWaveOutputFormat.class);

  // broadcast string names
  final ClassTag<String> stringTag = scala.reflect.ClassTag$.MODULE$.apply(String.class);
  final Broadcast<String> typeName = sc.broadcast(adapter.getTypeName(), stringTag);
  final Broadcast<String> indexName = sc.broadcast(index.getName(), stringTag);

  // map to a pair containing the output key and the output value
  inputRDD.mapToPair(
      feat -> new Tuple2<>(
          new GeoWaveOutputKey(typeName.value(), indexName.value()),
          feat)).saveAsNewAPIHadoopDataset(job.getConfiguration());
}
 
Example #21
Source File: ParameterAveragingTrainingWorkerStats.java    From deeplearning4j with Apache License 2.0 5 votes vote down vote up
@Override
public void exportStatFiles(String outputPath, SparkContext sc) throws IOException {
    String d = DEFAULT_DELIMITER;

    //Broadcast get time:
    StatsUtils.exportStats(parameterAveragingWorkerBroadcastGetValueTimeMs, outputPath,
                    FILENAME_BROADCAST_GET_STATS, d, sc);

    //Network init time:
    StatsUtils.exportStats(parameterAveragingWorkerInitTimeMs, outputPath, FILENAME_INIT_STATS, d, sc);

    //Network fit time:
    StatsUtils.exportStats(parameterAveragingWorkerFitTimesMs, outputPath, FILENAME_FIT_STATS, d, sc);
}
 
Example #22
Source File: RDDUtils.java    From geowave with Apache License 2.0 5 votes vote down vote up
public static Broadcast<? extends NumericIndexStrategy> broadcastIndexStrategy(
    final SparkContext sc,
    final NumericIndexStrategy indexStrategy) {
  final ClassTag<NumericIndexStrategy> indexClassTag =
      scala.reflect.ClassTag$.MODULE$.apply(indexStrategy.getClass());
  final Broadcast<NumericIndexStrategy> broadcastStrategy =
      sc.broadcast(indexStrategy, indexClassTag);
  return broadcastStrategy;
}
 
Example #23
Source File: SparkSessionUtil.java    From jpmml-sparkml with GNU Affero General Public License v3.0 5 votes vote down vote up
static
public SparkSession createSparkSession(){
	SparkSession.Builder builder = SparkSession.builder()
		.appName("test")
		.master("local[1]")
		.config("spark.ui.enabled", false);

	SparkSession sparkSession = builder.getOrCreate();

	SparkContext sparkContext = sparkSession.sparkContext();
	sparkContext.setLogLevel("ERROR");

	return sparkSession;
}
 
Example #24
Source File: SparkTestBase.java    From spark-transformers with Apache License 2.0 5 votes vote down vote up
@Before
public void setup() {
    SparkConf sparkConf = new SparkConf();
    String master = "local[2]";
    sparkConf.setMaster(master);
    sparkConf.setAppName("Local Spark Unit Test");
    sc = new JavaSparkContext(new SparkContext(sparkConf));
    sqlContext = new SQLContext(sc);
}
 
Example #25
Source File: DataframeCheckpointApp.java    From net.jgp.labs.spark with Apache License 2.0 5 votes vote down vote up
private void start() {
  SparkConf conf = new SparkConf()
      .setAppName("Checkpoint")
      .setMaster("local[*]");
  SparkContext sparkContext = new SparkContext(conf);

  // We need to specify where Spark will save the checkpoint file. It can
  // be
  // an HDFS location.
  sparkContext.setCheckpointDir("/tmp");
  SparkSession spark = SparkSession.builder()
      .appName("Checkpoint")
      .master("local[*]")
      .getOrCreate();

  String filename = "data/tuple-data-file.csv";
  Dataset<Row> df1 =
      spark.read().format("csv").option("inferSchema", "true")
          .option("header", "false")
          .load(filename);
  System.out.println("DF #1 - step #1: simple dump of the dataframe");
  df1.show();

  System.out.println("DF #2 - step #2: same as DF #1 - step #1");
  Dataset<Row> df2 = df1.checkpoint(false);
  df2.show();

  df1 = df1.withColumn("x", df1.col("_c0"));
  System.out.println(
      "DF #1 - step #2: new column x, which is the same as _c0");
  df1.show();

  System.out.println("DF #2 - step #2: no operation was done on df2");
  df2.show();
}
 
Example #26
Source File: TestStreamingStep.java    From envelope with Apache License 2.0 5 votes vote down vote up
public JavaRDD<String> generateRDD() {
  Random values = new Random();
  values.setSeed(System.currentTimeMillis());
  List<String> list = Lists.newLinkedList();
  for (int i = 0; i < batchSize; i++) {
    list.add(String.valueOf(values.nextLong()));
  }
  SparkContext sc = Contexts.getSparkSession().sparkContext();
  JavaSparkContext jsc = JavaSparkContext.fromSparkContext(sc);
  return jsc.parallelize(list,this.partitions);
}
 
Example #27
Source File: RDD.java    From nemo with Apache License 2.0 5 votes vote down vote up
/**
 * Constructor.
 * @param sparkContext spark context containing configurations.
 * @param dag the current DAG.
 */
private RDD(final SparkContext sparkContext, final DAG<IRVertex, IREdge> dag) {
  super(sparkContext, null, ClassTag$.MODULE$.apply((Class<T>) Object.class));

  this.loopVertexStack = new Stack<>();
  this.dag = dag;
}
 
Example #28
Source File: SparkFrontendUtils.java    From nemo with Apache License 2.0 5 votes vote down vote up
/**
 * Derive Spark serializer from a spark context.
 * @param sparkContext spark context to derive the serializer from.
 * @return the serializer.
 */
public static Serializer deriveSerializerFrom(final SparkContext sparkContext) {
  if (sparkContext.conf().get("spark.serializer", "")
      .equals("org.apache.spark.serializer.KryoSerializer")) {
    return new KryoSerializer(sparkContext.conf());
  } else {
    return new JavaSerializer(sparkContext.conf());
  }
}
 
Example #29
Source File: JavaRDD.java    From nemo with Apache License 2.0 5 votes vote down vote up
/**
 * Static method to create a JavaRDD object from an iterable object.
 * @param sparkContext spark context containing configurations.
 * @param initialData initial data.
 * @param parallelism parallelism information.
 * @param <T> type of the resulting object.
 * @return the new JavaRDD object.
 */
public static <T> JavaRDD<T> of(final SparkContext sparkContext,
                                final Iterable<T> initialData, final Integer parallelism) {
  final DAGBuilder<IRVertex, IREdge> builder = new DAGBuilder<>();

  final IRVertex initializedSourceVertex = new InitializedSourceVertex<>(initialData);
  initializedSourceVertex.setProperty(ParallelismProperty.of(parallelism));
  builder.addVertex(initializedSourceVertex);

  return new JavaRDD<>(sparkContext, builder.buildWithoutSourceSinkCheck(), initializedSourceVertex);
}
 
Example #30
Source File: JavaRDD.java    From nemo with Apache License 2.0 5 votes vote down vote up
/**
 * Constructor.
 * @param sparkContext spark context containing configurations.
 * @param dag the current DAG.
 * @param lastVertex last vertex added to the builder.
 */
JavaRDD(final SparkContext sparkContext, final DAG<IRVertex, IREdge> dag, final IRVertex lastVertex) {
  // TODO #366: resolve while implementing scala RDD.
  super(RDD.of(sparkContext), ClassTag$.MODULE$.apply((Class<T>) Object.class));

  this.loopVertexStack = new Stack<>();
  this.sparkContext = sparkContext;
  this.dag = dag;
  this.lastVertex = lastVertex;
  this.serializer = SparkFrontendUtils.deriveSerializerFrom(sparkContext);
}