Java Code Examples for org.apache.spark.api.java.JavaSparkContext#fromSparkContext()

The following examples show how to use org.apache.spark.api.java.JavaSparkContext#fromSparkContext() . You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: Accumulator.java    From sparkResearch with Apache License 2.0 6 votes vote down vote up
public static void main(String[] args) {
    SparkSession sparkSession = SparkSession.builder()
            .master("local[4]").appName("AttackFind").getOrCreate();
    //初始化sparkContext
    JavaSparkContext javaSparkContext = JavaSparkContext.fromSparkContext(sparkSession.sparkContext());
    //日志输出级别
    javaSparkContext.setLogLevel("ERROR");
    //创建RDD
    JavaRDD<String> rdd = javaSparkContext.parallelize(Arrays.asList(JavaBean.origin_id, JavaBean.asset_name)).cache();

    AttackAccumulator attackAccumulator = new AttackAccumulator();
    //注册累加器
    javaSparkContext.sc().register(attackAccumulator, "attack_count");
    //生成一个随机数作为value
    JavaPairRDD<String, String> javaPairRDD = rdd.mapToPair((PairFunction<String, String, String>) s -> {
        Integer random = new Random().nextInt(10);
        return new Tuple2<>(s, s + ":" + random);
    });

    javaPairRDD.foreach((VoidFunction<Tuple2<String, String>>) tuple2 -> {
        attackAccumulator.add(tuple2._2);
    });
    System.out.println(attackAccumulator.value());
}
 
Example 2
Source File: KMeansRunner.java    From geowave with Apache License 2.0 6 votes vote down vote up
private void initContext() {
  if (session == null) {
    String jar = "";
    try {
      jar =
          KMeansRunner.class.getProtectionDomain().getCodeSource().getLocation().toURI().getPath();
      if (!FilenameUtils.isExtension(jar.toLowerCase(), "jar")) {
        jar = "";
      }
    } catch (final URISyntaxException e) {
      LOGGER.error("Unable to set jar location in spark configuration", e);
    }

    session = GeoWaveSparkConf.createSessionFromParams(appName, master, host, jar);

    jsc = JavaSparkContext.fromSparkContext(session.sparkContext());
  }
}
 
Example 3
Source File: KDERunner.java    From geowave with Apache License 2.0 6 votes vote down vote up
private void initContext() {
  if (session == null) {
    String jar = "";
    try {
      jar = KDERunner.class.getProtectionDomain().getCodeSource().getLocation().toURI().getPath();
      if (!FilenameUtils.isExtension(jar.toLowerCase(), "jar")) {
        jar = "";
      }
    } catch (final URISyntaxException e) {
      LOGGER.error("Unable to set jar location in spark configuration", e);
    }

    session = GeoWaveSparkConf.createSessionFromParams(appName, master, host, jar);

    jsc = JavaSparkContext.fromSparkContext(session.sparkContext());
  }
}
 
Example 4
Source File: RasterTileResizeSparkRunner.java    From geowave with Apache License 2.0 6 votes vote down vote up
private void initContext() {
  if (session == null) {
    String jar = "";
    try {
      jar =
          RasterTileResizeSparkRunner.class.getProtectionDomain().getCodeSource().getLocation().toURI().getPath();
      if (!FilenameUtils.isExtension(jar.toLowerCase(), "jar")) {
        jar = "";
      }
    } catch (final URISyntaxException e) {
      LOGGER.error("Unable to set jar location in spark configuration", e);
    }

    session = GeoWaveSparkConf.createSessionFromParams(appName, master, host, jar);

    jsc = JavaSparkContext.fromSparkContext(session.sparkContext());
  }
}
 
Example 5
Source File: SparkSegmentUriPushJob.java    From incubator-pinot with Apache License 2.0 6 votes vote down vote up
@Override
public void run()
    throws Exception {
  if (!_enableParallelPush) {
    super.run();
  } else {
    List<Path> tarFilePaths = getDataFilePaths(_segmentPattern);
    retainRecentFiles(tarFilePaths, _lookBackPeriod);
    List<String> segmentUris = new ArrayList<>(tarFilePaths.size());
    for (Path tarFilePath : tarFilePaths) {
      segmentUris.add(_segmentUriPrefix + tarFilePath.toUri().getRawPath() + _segmentUriSuffix);
    }
    JavaSparkContext sparkContext = JavaSparkContext.fromSparkContext(SparkContext.getOrCreate());
    if (_pushJobParallelism == -1) {
      _pushJobParallelism = segmentUris.size();
    }
    JavaRDD<String> pathRDD = sparkContext.parallelize(segmentUris, _pushJobParallelism);
    pathRDD.foreach(segmentUri -> {
      try (ControllerRestApi controllerRestApi = getControllerRestApi()) {
        controllerRestApi.sendSegmentUris(Arrays.asList(segmentUri));
      }
    });
  }
}
 
Example 6
Source File: ParDoTranslatorBatch.java    From beam with Apache License 2.0 6 votes vote down vote up
private static SideInputBroadcast createBroadcastSideInputs(
    List<PCollectionView<?>> sideInputs, TranslationContext context) {
  JavaSparkContext jsc =
      JavaSparkContext.fromSparkContext(context.getSparkSession().sparkContext());

  SideInputBroadcast sideInputBroadcast = new SideInputBroadcast();
  for (PCollectionView<?> sideInput : sideInputs) {
    Coder<? extends BoundedWindow> windowCoder =
        sideInput.getPCollection().getWindowingStrategy().getWindowFn().windowCoder();

    Coder<WindowedValue<?>> windowedValueCoder =
        (Coder<WindowedValue<?>>)
            (Coder<?>)
                WindowedValue.getFullCoder(sideInput.getPCollection().getCoder(), windowCoder);
    Dataset<WindowedValue<?>> broadcastSet = context.getSideInputDataSet(sideInput);
    List<WindowedValue<?>> valuesList = broadcastSet.collectAsList();
    List<byte[]> codedValues = new ArrayList<>();
    for (WindowedValue<?> v : valuesList) {
      codedValues.add(CoderHelpers.toByteArray(v, windowedValueCoder));
    }

    sideInputBroadcast.add(
        sideInput.getTagInternal().getId(), jsc.broadcast(codedValues), windowedValueCoder);
  }
  return sideInputBroadcast;
}
 
Example 7
Source File: SparkStructuredStreamingRunner.java    From beam with Apache License 2.0 6 votes vote down vote up
private TranslationContext translatePipeline(Pipeline pipeline) {
  PipelineTranslator.detectTranslationMode(pipeline, options);
  PipelineTranslator.replaceTransforms(pipeline, options);
  PipelineTranslator.prepareFilesToStageForRemoteClusterExecution(options);
  PipelineTranslator pipelineTranslator =
      options.isStreaming()
          ? new PipelineTranslatorStreaming(options)
          : new PipelineTranslatorBatch(options);

  final JavaSparkContext jsc =
      JavaSparkContext.fromSparkContext(
          pipelineTranslator.getTranslationContext().getSparkSession().sparkContext());
  initAccumulators(options, jsc);

  pipelineTranslator.translate(pipeline);
  return pipelineTranslator.getTranslationContext();
}
 
Example 8
Source File: BigQueryWordCountToBigQuery.java    From spark-on-k8s-gcp-examples with Apache License 2.0 6 votes vote down vote up
public static void main(String[] args) throws IOException {
  if (args.length != 2) {
    System.err.println("Usage: BigQueryWordCountToBigQuery <fully-qualified input table id> " +
        "<fully-qualified output table id>");
    System.exit(1);
  }

  Configuration conf = null;
  Path tmpDirPath = null;
  try (JavaSparkContext javaSparkContext = JavaSparkContext
      .fromSparkContext(SparkContext.getOrCreate())) {
    conf = configure(javaSparkContext.hadoopConfiguration(), args);
    tmpDirPath = new Path(conf.get(BigQueryConfiguration.TEMP_GCS_PATH_KEY)).getParent();
    deleteTmpDir(tmpDirPath, conf);
    compute(javaSparkContext, conf);
  } finally {
    if (conf != null && tmpDirPath != null) {
      deleteTmpDir(tmpDirPath, conf);
    }
  }
}
 
Example 9
Source File: BroadCastParam.java    From sparkResearch with Apache License 2.0 6 votes vote down vote up
/**
 * 广播变量测试
 * @param args
 */
public static void main(String[] args) {
    SparkSession sparkSession = SparkSession.builder()
            .master("local[4]").appName("AttackFind").getOrCreate();
    //初始化sparkContext
    JavaSparkContext javaSparkContext = JavaSparkContext.fromSparkContext(sparkSession.sparkContext());
    //在这里假定一份广播变量
    //因为我们之前说过,广播变量只可读
    final List<String> broadcastList = Arrays.asList("190099HJLL","98392QUEYY","561788LLKK");
    //设置广播变量,把broadcast广播出去
    final Broadcast<List<String>> broadcast = javaSparkContext.broadcast(broadcastList);
    //定义数据
    JavaPairRDD<String,String> pairRDD = javaSparkContext.parallelizePairs(Arrays.asList(new Tuple2<>("000", "000")));
    JavaPairRDD<String,String> resultPairRDD = pairRDD.filter((Function<Tuple2<String, String>, Boolean>) v1 -> broadcast.value().contains(v1._2));
    resultPairRDD.foreach((VoidFunction<Tuple2<String, String>>) System.out::println);
}
 
Example 10
Source File: TestStreamingStep.java    From envelope with Apache License 2.0 5 votes vote down vote up
public JavaRDD<String> generateRDD() {
  Random values = new Random();
  values.setSeed(System.currentTimeMillis());
  List<String> list = Lists.newLinkedList();
  for (int i = 0; i < batchSize; i++) {
    list.add(String.valueOf(values.nextLong()));
  }
  SparkContext sc = Contexts.getSparkSession().sparkContext();
  JavaSparkContext jsc = JavaSparkContext.fromSparkContext(sc);
  return jsc.parallelize(list,this.partitions);
}
 
Example 11
Source File: JavaSVDExample.java    From SparkDemo with MIT License 5 votes vote down vote up
public static void main(String[] args) {
  SparkConf conf = new SparkConf().setAppName("SVD Example");
  SparkContext sc = new SparkContext(conf);
  JavaSparkContext jsc = JavaSparkContext.fromSparkContext(sc);

  // $example on$
  double[][] array = {{1.12, 2.05, 3.12}, {5.56, 6.28, 8.94}, {10.2, 8.0, 20.5}};
  LinkedList<Vector> rowsList = new LinkedList<>();
  for (int i = 0; i < array.length; i++) {
    Vector currentRow = Vectors.dense(array[i]);
    rowsList.add(currentRow);
  }
  JavaRDD<Vector> rows = jsc.parallelize(rowsList);

  // Create a RowMatrix from JavaRDD<Vector>.
  RowMatrix mat = new RowMatrix(rows.rdd());

  // Compute the top 3 singular values and corresponding singular vectors.
  SingularValueDecomposition<RowMatrix, Matrix> svd = mat.computeSVD(3, true, 1.0E-9d);
  RowMatrix U = svd.U();
  Vector s = svd.s();
  Matrix V = svd.V();
  // $example off$
  Vector[] collectPartitions = (Vector[]) U.rows().collect();
  System.out.println("U factor is:");
  for (Vector vector : collectPartitions) {
    System.out.println("\t" + vector);
  }
  System.out.println("Singular values are: " + s);
  System.out.println("V factor is:\n" + V);

  jsc.stop();
}
 
Example 12
Source File: Model.java    From predictionio-template-java-ecom-recommender with Apache License 2.0 5 votes vote down vote up
public static Model load(String id, Params params, SparkContext sc) {
    JavaSparkContext jsc = JavaSparkContext.fromSparkContext(sc);
    JavaPairRDD<Integer, double[]> userFeatures = JavaPairRDD.<Integer, double[]>fromJavaRDD(jsc.<Tuple2<Integer, double[]>>objectFile("/tmp/" + id + "/userFeatures"));
    JavaPairRDD<Integer, Tuple2<String, double[]>> indexItemFeatures = JavaPairRDD.<Integer, Tuple2<String, double[]>>fromJavaRDD(jsc.<Tuple2<Integer, Tuple2<String, double[]>>>objectFile("/tmp/" + id + "/indexItemFeatures"));
    JavaPairRDD<String, Integer> userIndex = JavaPairRDD.<String, Integer>fromJavaRDD(jsc.<Tuple2<String, Integer>>objectFile("/tmp/" + id + "/userIndex"));
    JavaPairRDD<String, Integer> itemIndex = JavaPairRDD.<String, Integer>fromJavaRDD(jsc.<Tuple2<String, Integer>>objectFile("/tmp/" + id + "/itemIndex"));
    JavaRDD<ItemScore> itemPopularityScore = jsc.objectFile("/tmp/" + id + "/itemPopularityScore");
    Map<String, Item> items = jsc.<Map<String, Item>>objectFile("/tmp/" + id + "/items").collect().get(0);

    logger.info("loaded model");
    return new Model(userFeatures, indexItemFeatures, userIndex, itemIndex, itemPopularityScore, items);
}
 
Example 13
Source File: AbstractSparkLayer.java    From oryx with Apache License 2.0 5 votes vote down vote up
protected final JavaStreamingContext buildStreamingContext() {
  log.info("Starting SparkContext with interval {} seconds", generationIntervalSec);

  SparkConf sparkConf = new SparkConf();

  // Only for tests, really
  if (sparkConf.getOption("spark.master").isEmpty()) {
    log.info("Overriding master to {} for tests", streamingMaster);
    sparkConf.setMaster(streamingMaster);
  }
  // Only for tests, really
  if (sparkConf.getOption("spark.app.name").isEmpty()) {
    String appName = "Oryx" + getLayerName();
    if (id != null) {
      appName = appName + '-' + id;
    }
    log.info("Overriding app name to {} for tests", appName);
    sparkConf.setAppName(appName);
  }
  extraSparkConfig.forEach((key, value) -> sparkConf.setIfMissing(key, value.toString()));

  // Turn this down to prevent long blocking at shutdown
  sparkConf.setIfMissing(
      "spark.streaming.gracefulStopTimeout",
      Long.toString(TimeUnit.MILLISECONDS.convert(generationIntervalSec, TimeUnit.SECONDS)));
  sparkConf.setIfMissing("spark.cleaner.ttl", Integer.toString(20 * generationIntervalSec));
  long generationIntervalMS =
      TimeUnit.MILLISECONDS.convert(generationIntervalSec, TimeUnit.SECONDS);

  JavaSparkContext jsc = JavaSparkContext.fromSparkContext(SparkContext.getOrCreate(sparkConf));
  return new JavaStreamingContext(jsc, new Duration(generationIntervalMS));
}
 
Example 14
Source File: SparkSegmentTarPushJob.java    From incubator-pinot with Apache License 2.0 5 votes vote down vote up
@Override
public void run()
    throws Exception {
  if (!_enableParallelPush) {
    super.run();
  } else {
    List<Path> segmentPathsToPush = getDataFilePaths(_segmentPattern);
    retainRecentFiles(segmentPathsToPush, _lookBackPeriod);
    List<String> segmentsToPush = new ArrayList<>();
    segmentPathsToPush.forEach(path -> {
      segmentsToPush.add(path.toString());
    });
    JavaSparkContext sparkContext = JavaSparkContext.fromSparkContext(SparkContext.getOrCreate());
    if (_pushJobParallelism == -1) {
      _pushJobParallelism = segmentsToPush.size();
    }
    JavaRDD<String> pathRDD = sparkContext.parallelize(segmentsToPush, _pushJobParallelism);
    pathRDD.foreach(segmentTarPath -> {
      try (ControllerRestApi controllerRestApi = getControllerRestApi()) {
        FileSystem fileSystem = FileSystem.get(new Path(segmentTarPath).toUri(), new Configuration());
        // TODO: Deal with invalid prefixes in the future
        List<String> currentSegments = controllerRestApi.getAllSegments("OFFLINE");
        controllerRestApi.pushSegments(fileSystem, Arrays.asList(new Path(segmentTarPath)));
        if (_deleteExtraSegments) {
          controllerRestApi
              .deleteSegmentUris(getSegmentsToDelete(currentSegments, Arrays.asList(new Path(segmentTarPath))));
        }
      }
    });
  }
}
 
Example 15
Source File: SparkInterpreter.java    From zeppelin with Apache License 2.0 4 votes vote down vote up
@Override
public void open() throws InterpreterException {
  try {
    SparkConf conf = new SparkConf();
    for (Map.Entry<Object, Object> entry : getProperties().entrySet()) {
      if (!StringUtils.isBlank(entry.getValue().toString())) {
        conf.set(entry.getKey().toString(), entry.getValue().toString());
      }
      // zeppelin.spark.useHiveContext & zeppelin.spark.concurrentSQL are legacy zeppelin
      // properties, convert them to spark properties here.
      if (entry.getKey().toString().equals("zeppelin.spark.useHiveContext")) {
        conf.set("spark.useHiveContext", entry.getValue().toString());
      }
      if (entry.getKey().toString().equals("zeppelin.spark.concurrentSQL")
          && entry.getValue().toString().equals("true")) {
        conf.set(SparkStringConstants.SCHEDULER_MODE_PROP_NAME, "FAIR");
      }
    }
    // use local mode for embedded spark mode when spark.master is not found
    if (!conf.contains(SparkStringConstants.MASTER_PROP_NAME)) {
      if (conf.contains("master")) {
        conf.set(SparkStringConstants.MASTER_PROP_NAME, conf.get("master"));
      } else {
        String masterEnv = System.getenv(SparkStringConstants.MASTER_ENV_NAME);
        conf.set(SparkStringConstants.MASTER_PROP_NAME,
                masterEnv == null ? SparkStringConstants.DEFAULT_MASTER_VALUE : masterEnv);
      }
    }
    this.innerInterpreter = loadSparkScalaInterpreter(conf);
    this.innerInterpreter.open();

    sc = this.innerInterpreter.getSparkContext();
    jsc = JavaSparkContext.fromSparkContext(sc);
    sparkVersion = SparkVersion.fromVersionString(sc.version());
    if (enableSupportedVersionCheck && sparkVersion.isUnsupportedVersion()) {
      throw new Exception("This is not officially supported spark version: " + sparkVersion
          + "\nYou can set zeppelin.spark.enableSupportedVersionCheck to false if you really" +
          " want to try this version of spark.");
    }
    sqlContext = this.innerInterpreter.getSqlContext();
    sparkSession = this.innerInterpreter.getSparkSession();

    SESSION_NUM.incrementAndGet();
  } catch (Exception e) {
    LOGGER.error("Fail to open SparkInterpreter", e);
    throw new InterpreterException("Fail to open SparkInterpreter", e);
  }
}
 
Example 16
Source File: AbstractSparkIT.java    From oryx with Apache License 2.0 4 votes vote down vote up
@BeforeClass
public static void setUp() {
  SparkConf sparkConf = new SparkConf().setMaster("local[*]").setAppName("SparkIT");
  javaSparkContext = JavaSparkContext.fromSparkContext(SparkContext.getOrCreate(sparkConf));
}
 
Example 17
Source File: SparkBroadcastHelper.java    From bpmn.ai with BSD 3-Clause "New" or "Revised" License 4 votes vote down vote up
public<T> void broadcastVariable(BROADCAST_VARIABLE name,  T varToBroadcast) {
    JavaSparkContext jsc = JavaSparkContext.fromSparkContext(SparkSession.builder().getOrCreate().sparkContext());
    Broadcast<Object> broadcastedVar = jsc.broadcast(varToBroadcast);
    BROADCAST_VARIABLES.put(name, broadcastedVar);
}
 
Example 18
Source File: SparkTableUtil.java    From iceberg with Apache License 2.0 4 votes vote down vote up
/**
 * Import files from given partitions to an Iceberg table.
 *
 * @param spark a Spark session
 * @param partitions partitions to import
 * @param targetTable an Iceberg table where to import the data
 * @param spec a partition spec
 * @param stagingDir a staging directory to store temporary manifest files
 */
public static void importSparkPartitions(
    SparkSession spark, List<SparkPartition> partitions, Table targetTable, PartitionSpec spec, String stagingDir) {
  Configuration conf = spark.sessionState().newHadoopConf();
  SerializableConfiguration serializableConf = new SerializableConfiguration(conf);
  int parallelism = Math.min(partitions.size(), spark.sessionState().conf().parallelPartitionDiscoveryParallelism());
  int numShufflePartitions = spark.sessionState().conf().numShufflePartitions();
  MetricsConfig metricsConfig = MetricsConfig.fromProperties(targetTable.properties());

  JavaSparkContext sparkContext = JavaSparkContext.fromSparkContext(spark.sparkContext());
  JavaRDD<SparkPartition> partitionRDD = sparkContext.parallelize(partitions, parallelism);

  Dataset<SparkPartition> partitionDS = spark.createDataset(
      partitionRDD.rdd(),
      Encoders.javaSerialization(SparkPartition.class));

  List<ManifestFile> manifests = partitionDS
      .flatMap((FlatMapFunction<SparkPartition, DataFile>) sparkPartition ->
              listPartition(sparkPartition, spec, serializableConf, metricsConfig).iterator(),
          Encoders.javaSerialization(DataFile.class))
      .repartition(numShufflePartitions)
      .map((MapFunction<DataFile, Tuple2<String, DataFile>>) file ->
              Tuple2.apply(file.path().toString(), file),
          Encoders.tuple(Encoders.STRING(), Encoders.javaSerialization(DataFile.class)))
      .orderBy(col("_1"))
      .mapPartitions(
          (MapPartitionsFunction<Tuple2<String, DataFile>, ManifestFile>) fileTuple ->
              buildManifest(serializableConf, spec, stagingDir, fileTuple),
          Encoders.javaSerialization(ManifestFile.class))
      .collectAsList();

  try {
    boolean snapshotIdInheritanceEnabled = PropertyUtil.propertyAsBoolean(
        targetTable.properties(),
        TableProperties.SNAPSHOT_ID_INHERITANCE_ENABLED,
        TableProperties.SNAPSHOT_ID_INHERITANCE_ENABLED_DEFAULT);

    AppendFiles append = targetTable.newAppend();
    manifests.forEach(append::appendManifest);
    append.commit();

    if (!snapshotIdInheritanceEnabled) {
      // delete original manifests as they were rewritten before the commit
      deleteManifests(targetTable.io(), manifests);
    }
  } catch (Throwable e) {
    deleteManifests(targetTable.io(), manifests);
    throw e;
  }
}
 
Example 19
Source File: ChronixSparkContext.java    From chronix.spark with Apache License 2.0 2 votes vote down vote up
/**
 * Additional constructor for a ChronixSparkContext to be
 * constructed from a plain SparkContext. for convenience if
 * used in Apache Zeppelin.
 *
 * @param sc an initialized SparkContext
 */
public ChronixSparkContext(SparkContext sc) {
    this.jsc = JavaSparkContext.fromSparkContext(sc);
}