Java Code Examples for org.apache.spark.SparkContext#broadcast()

The following examples show how to use org.apache.spark.SparkContext#broadcast() . You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: SparkMaster.java    From GeoTriples with Apache License 2.0 6 votes vote down vote up
/**
 * Convert the input Dataset into RDF triples and store the results.
 * The conversion is taking place per Partitions using the mapPartition Spark transformation.
 * @param mapping_list list of TripleMaps
 */
private void convert_partition(ArrayList<TriplesMap> mapping_list){
    SparkContext sc = SparkContext.getOrCreate();

    Pair<ArrayList<TriplesMap>, List<String>> transformation_info = new Pair<>(mapping_list, Arrays.asList(reader.getHeaders()));
    ClassTag<Pair<ArrayList<TriplesMap>, List<String>>> classTag_pair = scala.reflect.ClassTag$.MODULE$.apply(Pair.class);
    Broadcast<Pair<ArrayList<TriplesMap>, List<String>>> bd_info = sc.broadcast(transformation_info, classTag_pair);

    rowRDD
        .mapPartitions(
        (Iterator<Row> rows_iter) -> {
            ArrayList<TriplesMap> p_mapping_list = bd_info.value().getKey();
            List<String> p_header = bd_info.value().getValue();
            RML_Converter rml_converter = new RML_Converter(p_mapping_list, p_header);
            rml_converter.start();
            rml_converter.registerFunctions();
            Iterator<String> triples = rml_converter.convertPartition(rows_iter);

            rml_converter.stop();
            return triples;
        })
        .saveAsTextFile(outputDir);
}
 
Example 2
Source File: SparkMaster.java    From GeoTriples with Apache License 2.0 6 votes vote down vote up
/**
 * Convert the input Dataset into RDF triples and store the results.
 * The conversion is taking place per Per using the map Spark transformation.
 * @param mapping_list list of TripleMaps
 */
private void convert_row(ArrayList<TriplesMap> mapping_list){

    SparkContext sc = SparkContext.getOrCreate();

    RML_Converter rml_converter = new RML_Converter(mapping_list, Arrays.asList(reader.getHeaders()));
    ClassTag<RML_Converter> classTagRML_Converter = scala.reflect.ClassTag$.MODULE$.apply(RML_Converter.class);
    Broadcast<RML_Converter> bc_converter = sc.broadcast(rml_converter, classTagRML_Converter);

    ClassTag<HashMap<URI, Function>> classTag_hashMap = scala.reflect.ClassTag$.MODULE$.apply(HashMap.class);
    Broadcast<HashMap<URI, Function>> bc_functionsHashMap = sc.broadcast(FunctionFactory.availableFunctions, classTag_hashMap);
    rowRDD
        .map((row) ->  {
            FunctionFactory.availableFunctions = bc_functionsHashMap.value();
            return bc_converter.value().convertRow(row);
        } )
        .saveAsTextFile(outputDir);
}
 
Example 3
Source File: DeepRDD.java    From deep-spark with Apache License 2.0 5 votes vote down vote up
public DeepRDD(SparkContext sc, S config) {
    super(sc, scala.collection.Seq$.MODULE$.empty(), ClassTag$.MODULE$.<T>apply(config
            .getEntityClass()));
    config.setRddId(id());
    this.config =
            sc.broadcast(config, ClassTag$.MODULE$
                    .<S>apply(config.getClass()));

}
 
Example 4
Source File: RDDUtils.java    From geowave with Apache License 2.0 5 votes vote down vote up
/**
 * Translate a set of objects in a JavaRDD to a provided type and push to GeoWave
 *
 * @throws IOException
 */
private static void writeToGeoWave(
    final SparkContext sc,
    final Index index,
    final DataStorePluginOptions outputStoreOptions,
    final DataTypeAdapter adapter,
    final JavaRDD<SimpleFeature> inputRDD) throws IOException {

  // setup the configuration and the output format
  final Configuration conf = new org.apache.hadoop.conf.Configuration(sc.hadoopConfiguration());

  GeoWaveOutputFormat.setStoreOptions(conf, outputStoreOptions);
  GeoWaveOutputFormat.addIndex(conf, index);
  GeoWaveOutputFormat.addDataAdapter(conf, adapter);

  // create the job
  final Job job = new Job(conf);
  job.setOutputKeyClass(GeoWaveOutputKey.class);
  job.setOutputValueClass(SimpleFeature.class);
  job.setOutputFormatClass(GeoWaveOutputFormat.class);

  // broadcast string names
  final ClassTag<String> stringTag = scala.reflect.ClassTag$.MODULE$.apply(String.class);
  final Broadcast<String> typeName = sc.broadcast(adapter.getTypeName(), stringTag);
  final Broadcast<String> indexName = sc.broadcast(index.getName(), stringTag);

  // map to a pair containing the output key and the output value
  inputRDD.mapToPair(
      feat -> new Tuple2<>(
          new GeoWaveOutputKey(typeName.value(), indexName.value()),
          feat)).saveAsNewAPIHadoopDataset(job.getConfiguration());
}
 
Example 5
Source File: RDDUtils.java    From geowave with Apache License 2.0 5 votes vote down vote up
public static void writeRasterToGeoWave(
    final SparkContext sc,
    final Index index,
    final DataStorePluginOptions outputStoreOptions,
    final RasterDataAdapter adapter,
    final JavaRDD<GridCoverage> inputRDD) throws IOException {

  // setup the configuration and the output format
  final Configuration conf = new org.apache.hadoop.conf.Configuration(sc.hadoopConfiguration());

  GeoWaveOutputFormat.setStoreOptions(conf, outputStoreOptions);
  GeoWaveOutputFormat.addIndex(conf, index);
  GeoWaveOutputFormat.addDataAdapter(conf, adapter);

  // create the job
  final Job job = new Job(conf);
  job.setOutputKeyClass(GeoWaveOutputKey.class);
  job.setOutputValueClass(GridCoverage.class);
  job.setOutputFormatClass(GeoWaveOutputFormat.class);

  // broadcast string names
  final ClassTag<String> stringTag = scala.reflect.ClassTag$.MODULE$.apply(String.class);
  final Broadcast<String> typeName = sc.broadcast(adapter.getTypeName(), stringTag);
  final Broadcast<String> indexName = sc.broadcast(index.getName(), stringTag);

  // map to a pair containing the output key and the output value
  inputRDD.mapToPair(
      gridCoverage -> new Tuple2<>(
          new GeoWaveOutputKey(typeName.value(), indexName.value()),
          gridCoverage)).saveAsNewAPIHadoopDataset(job.getConfiguration());
}
 
Example 6
Source File: RDDUtils.java    From geowave with Apache License 2.0 5 votes vote down vote up
public static Broadcast<? extends NumericIndexStrategy> broadcastIndexStrategy(
    final SparkContext sc,
    final NumericIndexStrategy indexStrategy) {
  final ClassTag<NumericIndexStrategy> indexClassTag =
      scala.reflect.ClassTag$.MODULE$.apply(indexStrategy.getClass());
  final Broadcast<NumericIndexStrategy> broadcastStrategy =
      sc.broadcast(indexStrategy, indexClassTag);
  return broadcastStrategy;
}